class CSVUtil(object): def __init__(self): self._fLoader = PyFileLoader() #@Arguments: #path = path string for a csv file #keyIndex = an interger with the column index to become the dictionary key, set as 0 by default, i.e.: first column of csv file #Return: A dictionary whose keys were set by keyIndex and the value is a list of dictionaries for the specified key def loadCSV(self, path, keyIndex=0): path = path.lstrip("/").rstrip("\n") collection = {} try: data = self._fLoader.loadPath(path) n = 0 header = "" for row in data: info = row.rstrip("\n").split(";") #Getting value as the key for the dictionary key = info.pop(keyIndex) #Reading the header if n == 0: header = info n = n + 1 #For the rest of the file else: single = {} i = 0 for col in header: single[col] = info[i] i = i+1 self._fLoader.pushDict(collection, key, single) except Exception, e: print e return collection
def get_list(screen_name, query): """ #@Arguments: #screen_name = a string with the user screen name #query = string to decide which kind of list must be retrived, i.e.: followers, friends, and etc.. #Return: list of ids for the given query or an empty list in case it is not possible, i.e. a protected account """ print 'Collect list of:'+screen_name+"'s "+query #Marking the current user current_user = file_loader.loadPath(log_path+"status.txt", 'w') current_user.write(screen_name) current_user.close() _list = [] while CURSOR != "0": url = "https://api.twitter.com/1.1/"+query+"/ids.json?cursor=" url += CURSOR+"&screen_name="+screen_name+"&count=5000" info = CREDENTIALS[CREDENTIAL_INDEX].request(url) data = check_limit(info, screen_name, 'list') if data is not False: if "ids" not in data: print "Something went wrong, data = ", data _list = _list + data["ids"] else: print 'Some problem with data, URL = ', url CURSOR = "-1" return _list
def get_all_accounts(csv_path, keyIndex): """ #@Arguments: #csv_path = Path string for the .csv file which contains all accounts information. #keyIndex = The integer representing the index position for screen_name in the csv file #op = mode for writing the output file, 'w' by default. If none found, a new one is created #Return: None, only writes information on file """ pu.set_unicode() #accounts = file_loader.loadCSV(csv_path, keyIndex) header = "group;subgroup;name;screen_name;lang;twitter_id;" header += "followers_count;listed_count;statuses_count;friends_count;favourites_count;" header += "url;created_at;time_zone\n" #Creating files which will contain information about all accounts and their file path which will contain the followers f_index = file_loader.loadPath(_output_path+'index.txt') f_csv = file_loader.loadPath(_output_path+'all-users.csv') f_csv.write(header) total = len(accounts) i = 0 #Iterating over the dictionary with all accounts for screen_name in accounts.keys(): p = Person() #If the user is labeled in more than one group/subgroup, it will be considered the last one user = accounts[screen_name].pop() p_path = _output_path+user['group']+'/'+user['subgroup']+'/'+screen_name+".txt" info = get_user_info(screen_name) #if twitter account exists, PS: user data is at info[0]: if info is not False: f_index.write(p_path+'\n') save_user_info(p, info[0], user['group'], user['subgroup'], header, p_path) f_csv.write(str(p)) i = i + 1 #Printing the progress on the screen print 'Status: ', str(round(100*float(i)/total,2))+'% completed' f_index.close() f_csv.close()
def save_user_info(p, info, user, header, p_path): """ #@Arguments: #p = person object #info = a list with data #group = string with user group #subgroup = string with user subgroup #header = string containing the columns for the csv file #p_path = path to the file to write this user info #Returns nothing, just write info on file p.loadFromJSON(info, group, subgroup) #Creates a file for each account and this file will contain all of this user followers """ f_p = file_loader.loadPath(p_path) f_p.write(header) f_p.write(str(p)) log.note("Retrieved: "+user) f_p.close()