Exemple #1
0
def load_user_all(prefix):
    detector = detect.SpamDetector(prefix=prefix)
    count = 0
    for name, group in detector.get_user_info().iteritems():
        count += 1
        if group['screen_name'] == 'LuskRoyster':
            print count
            exit()
    return detector.get_user_info()
Exemple #2
0
def load_user(prefix, group_id):
    full_prefix = util.get_full_prefix(prefix)
    detector = detect.SpamDetector(prefix=full_prefix)
    group = detector.get_spam_group()
    user_info = detector.get_user_info()
    #alluser = set([])
    id_count = 1
    for g in group:
        if id_count == group_id:
            return group[g]['spam_user'], user_info
        id_count += 1
Exemple #3
0
def load_user_screenname_custom(prefix, percent_same=0.2):
    detector = detect.SpamDetector(prefix=prefix,
                                   url_based=True,
                                   sourcefile=None,
                                   collect_url_only=False)
    spam_group = detector.parameter_sweeping(min_duplicate_factor=3,
                                             percent_same=percent_same,
                                             return_all=True)
    spam_user = set([])
    for value in spam_group.values():
        spam_user = spam_user.union(set(value['screen_name']))
    print len(spam_user)
    return spam_user
def load_user(prefix, group_id):
    full_prefix = util.get_full_prefix(prefix)
    detector = detect.SpamDetector(prefix=full_prefix)
    group = detector.get_spam_group()
    user_info = detector.get_user_info()
    #alluser = set([])
    #print '[IN load_user], group_id is %d' %(group_id)
    #print '[IN load_user], length of group is %d' %(len(group))
    id_count = 1
    for g in group:
        #print 'current group_id is %d' %(id_count)
        if id_count == group_id:
            return group[g]['spam_user'], user_info
        id_count += 1
Exemple #5
0
def spam_group_pie_chart_miniplot_new():
    convert = {
        'bit': 2.40,
        'ift': 2.56,
        'ow': 3.15,
        'tinyurl': 3.52,
        'dld': 4.41,
        'viid': 4.95,
        'goo': 6.31,
        'dlvr': 7.90,
        'lnis': 23.07
    }
    all_labels = []
    all_sizes = []
    titles = []
    dic = json.load(open('spam_category.json', 'r'))
    for entry in sorted(dic):
        labels = []
        sizes = []
        prefix = entry.keys()[0]
        titles.append(prefix + ' (' + str(convert[prefix]) + '%)')

        full_prefix = get_full_prefix(prefix)
        detector = detect.SpamDetector(prefix=full_prefix)
        group = detector.get_spam_group()
        for category in [
                'malware', 'bot followers', 'p**n', 'news bot', 'click bait',
                'Quran verses', 'other'
        ]:
            #if entry[prefix][category]["spam_group_id"] != []:
            if "promotion" in category:
                labels.append("click bait")
            else:
                labels.append(category)
            total = set([])
            for _id_ in entry[prefix][category]['spam_group_id']:
                user_to_add = set(group[group.keys()[_id_ - 1]]['spam_user'])
                total.update(user_to_add)
                print len(total)
            sizes.append(len(total))
        all_labels.append(labels)
        all_sizes.append(sizes)
    print all_labels
    print all_sizes
    print titles
    timeline_new.plot_pie_miniplot(labels=all_labels,
                                   sizes=all_sizes,
                                   titles=titles,
                                   filename=None)
Exemple #6
0
def load_user(prefix):
    full_prefix = util.get_full_prefix(prefix)
    detector = detect.SpamDetector(prefix=full_prefix)
    group = detector.get_spam_group()
    """
	Run those two lines of code of url info file does not exist
	
	url_info = detector.get_url_per_user()
	json.dump(url_info, open('metadata/'+prefix+'_user_url_dictionary.json','w'))
	"""

    url_info = json.load(
        open('metadata/' + prefix + '_user_url_dictionary.json', 'r'))
    print len(url_info)

    #alluser = set([])
    # id_count = 1

    for index, g in enumerate(group):
        unique_url = set([])
        for user in group[g]['spam_user']:
            for url in url_info[str(user)]:
                if prefix in url:
                    unique_url.add(url)
                else:
                    if 'twitter.com' in url:
                        if url in cache:
                            print 'find url in cache'
                            unique_url.add(cache[url])
                        else:
                            try:
                                print url
                                new_url = extract_url_from_twitter_page(
                                    url, prefix)
                                if new_url:
                                    unique_url.add(new_url)
                            except Exception, e:
                                print e
                                time.sleep(2)

            #unique_url = unique_url.union(set(url_info[str(user)]))
            #print len(unique_url)

        #pprint.pprint(unique_url)
        group[g]['unique_url'] = list(unique_url)
def get_tweet_of_suspended_user(filename):
    status = pickle.load(open(filename, 'rb'))
    for k, v in status.iteritems():
        count = 0
        if 'trump' in k:
            suspended = v['suspended']
            detector = detect.SpamDetector(prefix=k)
            result = detector.get_tweet_from_user(suspended,
                                                  field='screen_name')
            print len(result)
            for tweet in result:
                if 'StylishRentals' in tweet['text']:
                    count += 1
                    print tweet['text'].encode('utf-8', 'ignore')
                    #print tweet['user']['id']
                    #print tweet['user']['screen_name']
                    #print tweet['created_at']
            print count
def get_and_store_status(filename):
    if os.path.isfile(filename):
        print 'file exists'
        return
    dic = {}
    #['bit', 'tinyurl', 'lnis', 'viid', 'goo', 'dld', 'ift', 'dlvr', 'ow']
    for prefix in [
            'bit', 'tinyurl', 'lnis', 'viid', 'goo', 'dld', 'ift', 'dlvr', 'ow'
    ]:
        print prefix
        full_prefix = util.get_full_prefix(prefix)
        detector = detect.SpamDetector(prefix=full_prefix)
        user = detector.get_spam_user_info(variable='screen_name')
        #user = ['WuerzRodrigo', 'reed_schepens']
        #user = ['InceZehraince3', 'noexistingasdf123', 'zhouhanchen', 'NBA76ersFans']
        #user = list(user)[:2]
        print len(user)
        dic[prefix] = check_status(user)
    json.dump(dic, open(filename, 'w'))
Exemple #9
0
def load_user(prefix):
    full_prefix = prefix + '/' + prefix + '_tweet_'
    detector = detect.SpamDetector(prefix=full_prefix)
    return detector.get_spam_user_info(variable='screen_name')
Exemple #10
0
def load_user(prefix):
    detector = detect.SpamDetector(prefix=prefix)
    return detector.get_spam_group()
Exemple #11
0
def load_user_screenname(prefix):
    detector = detect.SpamDetector(prefix=prefix)
    return detector.get_spam_user_info('screen_name')