Example #1
0
def is_new_account_bot(status):
    ret = False
    ad = AlphabetDetector()
    susp_score = 0
    egg = is_egg(status)
    if "user" not in status:
        return
    user = status["user"]
    sn = user["screen_name"]
    n = user["name"]
    bot_name = is_bot_name(sn)
    tweets = user["statuses_count"]
    friends = user["friends_count"]
    followers = user["followers_count"]
    created_at = user["created_at"]
    location = user["location"]
    time_obj = twitter_time_to_object(created_at)
    created_year = int(time_obj.strftime("%Y"))
    if egg == True:
        susp_score += 50
    if bot_name == True:
        susp_score += 100
    if created_year < 2017:
        susp_score -= 300
    if len(location) > 0:
        susp_score -= 150
    if len(sn) == 15:
        susp_score += 100
    if tweets == 0:
        susp_score += 50
    if tweets > 0:
        susp_score -= 50
    if tweets > 20:
        susp_score -= 100
    if friends == 21:
        susp_score += 100
    if friends == 0:
        susp_score += 50
    if friends != 21:
        susp_score -= 50
    if friends > 40:
        susp_score -= 100
    if friends > 100:
        susp_score -= 100
    if followers == 0:
        susp_score += 50
    if followers > 0:
        susp_score -= 200
    if len(n) < 3:
        susp_score += 100
    if ad.only_alphabet_chars(n, "CYRILLIC"):
        susp_score += 200
    if ad.only_alphabet_chars(n, "ARABIC"):
        susp_score += 200
    if ad.is_cjk(n):
        susp_score += 200
    if ad.only_alphabet_chars(n, "LATIN"):
        susp_score -= 100
    if susp_score > 0:
        return True
    else:
        return False
Example #2
0
            key = lineSplit[1]
            value.append(transcr)
            value.append(english)
            if key not in dictionary:
                dictionary[key] = value
            else:
                dictionary[key].append(value[0])
                dictionary[key].append(value[1])
    
with open('stal.xml', 'r', encoding='utf-8') as t:
    xml = t.read()
    tree = lxml.html.fromstring(xml)
    sentences = tree.xpath('.//body/se/text()')

for i in sentences:
    s = ad.is_cjk(i)
    sent = '<se>'
    if s == True:
        cleanstr = ''
        for letter in i:
            if letter not in punct:
                cleanstr += letter
            else:
                pass
        markdown(cleanstr)
        sent += '</se>\n'
        text += sent
    else:
        tagged = '<se lang="ru">' + i + '</se>\n'
        text += tagged