def is_new_account_bot(status): ret = False ad = AlphabetDetector() susp_score = 0 egg = is_egg(status) if "user" not in status: return user = status["user"] sn = user["screen_name"] n = user["name"] bot_name = is_bot_name(sn) tweets = user["statuses_count"] friends = user["friends_count"] followers = user["followers_count"] created_at = user["created_at"] location = user["location"] time_obj = twitter_time_to_object(created_at) created_year = int(time_obj.strftime("%Y")) if egg == True: susp_score += 50 if bot_name == True: susp_score += 100 if created_year < 2017: susp_score -= 300 if len(location) > 0: susp_score -= 150 if len(sn) == 15: susp_score += 100 if tweets == 0: susp_score += 50 if tweets > 0: susp_score -= 50 if tweets > 20: susp_score -= 100 if friends == 21: susp_score += 100 if friends == 0: susp_score += 50 if friends != 21: susp_score -= 50 if friends > 40: susp_score -= 100 if friends > 100: susp_score -= 100 if followers == 0: susp_score += 50 if followers > 0: susp_score -= 200 if len(n) < 3: susp_score += 100 if ad.only_alphabet_chars(n, "CYRILLIC"): susp_score += 200 if ad.only_alphabet_chars(n, "ARABIC"): susp_score += 200 if ad.is_cjk(n): susp_score += 200 if ad.only_alphabet_chars(n, "LATIN"): susp_score -= 100 if susp_score > 0: return True else: return False
key = lineSplit[1] value.append(transcr) value.append(english) if key not in dictionary: dictionary[key] = value else: dictionary[key].append(value[0]) dictionary[key].append(value[1]) with open('stal.xml', 'r', encoding='utf-8') as t: xml = t.read() tree = lxml.html.fromstring(xml) sentences = tree.xpath('.//body/se/text()') for i in sentences: s = ad.is_cjk(i) sent = '<se>' if s == True: cleanstr = '' for letter in i: if letter not in punct: cleanstr += letter else: pass markdown(cleanstr) sent += '</se>\n' text += sent else: tagged = '<se lang="ru">' + i + '</se>\n' text += tagged