def load_user_all(prefix): detector = detect.SpamDetector(prefix=prefix) count = 0 for name, group in detector.get_user_info().iteritems(): count += 1 if group['screen_name'] == 'LuskRoyster': print count exit() return detector.get_user_info()
def load_user(prefix, group_id): full_prefix = util.get_full_prefix(prefix) detector = detect.SpamDetector(prefix=full_prefix) group = detector.get_spam_group() user_info = detector.get_user_info() #alluser = set([]) id_count = 1 for g in group: if id_count == group_id: return group[g]['spam_user'], user_info id_count += 1
def load_user_screenname_custom(prefix, percent_same=0.2): detector = detect.SpamDetector(prefix=prefix, url_based=True, sourcefile=None, collect_url_only=False) spam_group = detector.parameter_sweeping(min_duplicate_factor=3, percent_same=percent_same, return_all=True) spam_user = set([]) for value in spam_group.values(): spam_user = spam_user.union(set(value['screen_name'])) print len(spam_user) return spam_user
def load_user(prefix, group_id): full_prefix = util.get_full_prefix(prefix) detector = detect.SpamDetector(prefix=full_prefix) group = detector.get_spam_group() user_info = detector.get_user_info() #alluser = set([]) #print '[IN load_user], group_id is %d' %(group_id) #print '[IN load_user], length of group is %d' %(len(group)) id_count = 1 for g in group: #print 'current group_id is %d' %(id_count) if id_count == group_id: return group[g]['spam_user'], user_info id_count += 1
def spam_group_pie_chart_miniplot_new(): convert = { 'bit': 2.40, 'ift': 2.56, 'ow': 3.15, 'tinyurl': 3.52, 'dld': 4.41, 'viid': 4.95, 'goo': 6.31, 'dlvr': 7.90, 'lnis': 23.07 } all_labels = [] all_sizes = [] titles = [] dic = json.load(open('spam_category.json', 'r')) for entry in sorted(dic): labels = [] sizes = [] prefix = entry.keys()[0] titles.append(prefix + ' (' + str(convert[prefix]) + '%)') full_prefix = get_full_prefix(prefix) detector = detect.SpamDetector(prefix=full_prefix) group = detector.get_spam_group() for category in [ 'malware', 'bot followers', 'p**n', 'news bot', 'click bait', 'Quran verses', 'other' ]: #if entry[prefix][category]["spam_group_id"] != []: if "promotion" in category: labels.append("click bait") else: labels.append(category) total = set([]) for _id_ in entry[prefix][category]['spam_group_id']: user_to_add = set(group[group.keys()[_id_ - 1]]['spam_user']) total.update(user_to_add) print len(total) sizes.append(len(total)) all_labels.append(labels) all_sizes.append(sizes) print all_labels print all_sizes print titles timeline_new.plot_pie_miniplot(labels=all_labels, sizes=all_sizes, titles=titles, filename=None)
def load_user(prefix): full_prefix = util.get_full_prefix(prefix) detector = detect.SpamDetector(prefix=full_prefix) group = detector.get_spam_group() """ Run those two lines of code of url info file does not exist url_info = detector.get_url_per_user() json.dump(url_info, open('metadata/'+prefix+'_user_url_dictionary.json','w')) """ url_info = json.load( open('metadata/' + prefix + '_user_url_dictionary.json', 'r')) print len(url_info) #alluser = set([]) # id_count = 1 for index, g in enumerate(group): unique_url = set([]) for user in group[g]['spam_user']: for url in url_info[str(user)]: if prefix in url: unique_url.add(url) else: if 'twitter.com' in url: if url in cache: print 'find url in cache' unique_url.add(cache[url]) else: try: print url new_url = extract_url_from_twitter_page( url, prefix) if new_url: unique_url.add(new_url) except Exception, e: print e time.sleep(2) #unique_url = unique_url.union(set(url_info[str(user)])) #print len(unique_url) #pprint.pprint(unique_url) group[g]['unique_url'] = list(unique_url)
def get_tweet_of_suspended_user(filename): status = pickle.load(open(filename, 'rb')) for k, v in status.iteritems(): count = 0 if 'trump' in k: suspended = v['suspended'] detector = detect.SpamDetector(prefix=k) result = detector.get_tweet_from_user(suspended, field='screen_name') print len(result) for tweet in result: if 'StylishRentals' in tweet['text']: count += 1 print tweet['text'].encode('utf-8', 'ignore') #print tweet['user']['id'] #print tweet['user']['screen_name'] #print tweet['created_at'] print count
def get_and_store_status(filename): if os.path.isfile(filename): print 'file exists' return dic = {} #['bit', 'tinyurl', 'lnis', 'viid', 'goo', 'dld', 'ift', 'dlvr', 'ow'] for prefix in [ 'bit', 'tinyurl', 'lnis', 'viid', 'goo', 'dld', 'ift', 'dlvr', 'ow' ]: print prefix full_prefix = util.get_full_prefix(prefix) detector = detect.SpamDetector(prefix=full_prefix) user = detector.get_spam_user_info(variable='screen_name') #user = ['WuerzRodrigo', 'reed_schepens'] #user = ['InceZehraince3', 'noexistingasdf123', 'zhouhanchen', 'NBA76ersFans'] #user = list(user)[:2] print len(user) dic[prefix] = check_status(user) json.dump(dic, open(filename, 'w'))
def load_user(prefix): full_prefix = prefix + '/' + prefix + '_tweet_' detector = detect.SpamDetector(prefix=full_prefix) return detector.get_spam_user_info(variable='screen_name')
def load_user(prefix): detector = detect.SpamDetector(prefix=prefix) return detector.get_spam_group()
def load_user_screenname(prefix): detector = detect.SpamDetector(prefix=prefix) return detector.get_spam_user_info('screen_name')