Python detectLang Beispiele, utility.detectLang Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: preprocess.py Projekt: imsorry1121/sn_crawler

def normGoogleProfile(jresult):
	# placesLived(value, primary), name(givenName, familyName), displayName, circledByCount, occupation, aboutMe, organizations(name, title, endDate, startDate), gender
	profile = dict()
	profile["name"] = (jresult["name"]["givenName"]+" "+jresult["name"]["familyName"]).lower().strip()
	profile["displayName"] = jresult.get("displayName","").lower().strip()
	profile["placesLived"] = jresult.get("placesLived", list())
	profile["circledByCount"] = jresult.get("circledByCount", 0)
	profile["tags"] = getGoogleTag(jresult)
	profile["nameLang"] = ut.detectLang(profile["name"])
	profile["displayNameLang"] = ut.detectLang(profile["displayName"])
	return profile

Beispiel #2

0

Datei anzeigen

Datei: preprocess.py Projekt: imsorry1121/sn_crawler

def normTwitterProfile(jresult):
	# location, name, screen_name, lang, followers_count, description
	profile = dict()
	profile["name"] = jresult["name"].lower().strip()
	profile["displayName"] = jresult["screen_name"].lower().strip()
	profile["placesLived"] = [{"value": jresult["location"], "primary": True}]
	profile["circledByCount"] = jresult.get("followers_count", 0)
	profile["tags"] = getStringTag(jresult["description"])
	profile["nameLang"] = ut.detectLang(profile["name"])
	profile["displayNameLang"] = ut.detectLang(profile["displayName"])
	# print(profile)
	return profile

Beispiel #3

0

Datei anzeigen

Datei: preprocess.py Projekt: sychen1121/sn_crawler

def normGoogleProfile(jresult):
    # placesLived(value, primary), name(givenName, familyName), displayName, circledByCount, occupation, aboutMe, organizations(name, title, endDate, startDate), gender
    profile = dict()
    profile["name"] = (jresult["name"]["givenName"] + " " +
                       jresult["name"]["familyName"]).lower().strip()
    profile["displayName"] = jresult.get("displayName", "").lower().strip()
    profile["placesLived"] = jresult.get("placesLived", list())
    profile["circledByCount"] = jresult.get("circledByCount", 0)
    profile["tags"] = getGoogleTag(jresult)
    profile["nameLang"] = ut.detectLang(profile["name"])
    profile["displayNameLang"] = ut.detectLang(profile["displayName"])
    return profile

Beispiel #4

0

Datei anzeigen

Datei: preprocess.py Projekt: sychen1121/sn_crawler

def normTwitterProfile(jresult):
    # location, name, screen_name, lang, followers_count, description
    profile = dict()
    profile["name"] = jresult["name"].lower().strip()
    profile["displayName"] = jresult["screen_name"].lower().strip()
    profile["placesLived"] = [{"value": jresult["location"], "primary": True}]
    profile["circledByCount"] = jresult.get("followers_count", 0)
    profile["tags"] = getStringTag(jresult["description"])
    profile["nameLang"] = ut.detectLang(profile["name"])
    profile["displayNameLang"] = ut.detectLang(profile["displayName"])
    # print(profile)
    return profile

Beispiel #5

0

Datei anzeigen

Datei: preprocess.py Projekt: imsorry1121/sn_crawler

def normGoogleWall(jresult):
	posts = list()
	page_count = 0
	if type(jresult) == list:
		for page in jresult:
			if page_count > 10:
				# revise the size in the future
				break
			for post in page["items"]:
				published_time = formatGoogleTime(post["published"])
				place = formatGooglePlace(post.get("location", ""), 2)
				info = post.get("object", "")
				if info != "":
					text = info.get("content", "")
					urls = getGoogleUrls(info.get("attachments", ""))
					# a = time.time()
					lang = ut.detectLang(text)
					# b = time.time()
					text_en = ut.translate(text, lang)
					# c= time.time()
					sentiment = ut.getSentiment(text_en)
					# d= time.time()
					topic_distri = ut.getTopic(text_en)
					tf = ut.wordProcess(text, lang)
					# e= time.time()
					# print(b-a, c-b, d-c, e-d)
					posts.append(getPost(text, text_en, published_time, place, urls, lang, sentiment, topic_distri, tf))
			page_count+=1
	return posts

Beispiel #6

0

Datei anzeigen

Datei: preprocess.py Projekt: sychen1121/sn_crawler

def normGoogleWall(jresult):
    posts = list()
    page_count = 0
    if type(jresult) == list:
        for page in jresult:
            if page_count > 10:
                # revise the size in the future
                break
            for post in page["items"]:
                published_time = formatGoogleTime(post["published"])
                place = formatGooglePlace(post.get("location", ""), 2)
                info = post.get("object", "")
                if info != "":
                    text = info.get("content", "")
                    urls = getGoogleUrls(info.get("attachments", ""))
                    # a = time.time()
                    lang = ut.detectLang(text)
                    # b = time.time()
                    text_en = ut.translate(text, lang)
                    # c= time.time()
                    sentiment = ut.getSentiment(text_en)
                    # d= time.time()
                    topic_distri = ut.getTopic(text_en)
                    tf = ut.wordProcess(text, lang)
                    # e= time.time()
                    # print(b-a, c-b, d-c, e-d)
                    posts.append(
                        getPost(text, text_en, published_time, place, urls,
                                lang, sentiment, topic_distri, tf))
            page_count += 1
    return posts

Beispiel #7

0

Datei anzeigen

Datei: preprocess.py Projekt: imsorry1121/sn_crawler

def normTwitterWall(wall):
	posts = list()
	for post in wall:
		text = post.get("text", "")
		time = formatTwitterTime(post.get("created_at"))
		place = formatTwitterPlace(post["geo"], 2)
		urls = getTwitterUrls(post)
		lang = post.get("lang", "")
		if lang == "":
			lang = ut.detectLang(text)
		# translate text
		text_en = ut.translate(text, lang)
		sentiment = ut.getSentiment(text_en)
		topic_distri = ut.getTopic(text_en)
		tf = ut.wordProcess(text, lang)
		posts.append(getPost(text, text_en, time, place, urls, lang, sentiment, topic_distri, tf))
	return posts

Beispiel #8

0

Datei anzeigen

Datei: preprocess.py Projekt: sychen1121/sn_crawler

def normTwitterWall(wall):
    posts = list()
    for post in wall:
        text = post.get("text", "")
        time = formatTwitterTime(post.get("created_at"))
        place = formatTwitterPlace(post["geo"], 2)
        urls = getTwitterUrls(post)
        lang = post.get("lang", "")
        if lang == "":
            lang = ut.detectLang(text)
        # translate text
        text_en = ut.translate(text, lang)
        sentiment = ut.getSentiment(text_en)
        topic_distri = ut.getTopic(text_en)
        tf = ut.wordProcess(text, lang)
        posts.append(
            getPost(text, text_en, time, place, urls, lang, sentiment,
                    topic_distri, tf))
    return posts

Beispiel #9

0

Datei anzeigen

Datei: preprocess.py Projekt: imsorry1121/sn_crawler

def getStringTag(string):
	tokens = list(ut.wordProcess(string, ut.detectLang(string)).keys())
	return tokens

Beispiel #10

0

Datei anzeigen

Datei: preprocess.py Projekt: sychen1121/sn_crawler

def getStringTag(string):
    tokens = list(ut.wordProcess(string, ut.detectLang(string)).keys())
    return tokens