Esempio n. 1
0
class CSVUtil(object):
  
  def __init__(self):
    self._fLoader = PyFileLoader()
  #@Arguments:
    #path = path string for a csv file 
    #keyIndex = an interger with the column index to become the dictionary key, set as 0 by default, i.e.: first column of csv file
  #Return: A dictionary whose keys were set by keyIndex and the value is a list of dictionaries for the specified key
  def loadCSV(self, path, keyIndex=0):
    path = path.lstrip("/").rstrip("\n")
    collection = {}
    try:
      data = self._fLoader.loadPath(path)
      n = 0
      header = ""
      for row in data:
        info = row.rstrip("\n").split(";")
        #Getting value as the key for the dictionary
        key = info.pop(keyIndex)     
        #Reading the header
        if n == 0:
          header = info
          n = n + 1
        #For the rest of the file
        else:
          single = {}
          i = 0
          for col in header:
            single[col] = info[i]
            i = i+1
          self._fLoader.pushDict(collection, key, single)
    except Exception, e:
      print e
    return collection
Esempio n. 2
0
def get_list(screen_name, query):
    """
    #@Arguments:
    #screen_name = a string with the user screen name 
    #query = string to decide which kind of list must be retrived, i.e.: followers, friends, and etc..
    #Return: list of ids for the given query or an empty list in case it is not possible, i.e. a protected account
    """
    print 'Collect list of:'+screen_name+"'s "+query
    #Marking the current user
    current_user = file_loader.loadPath(log_path+"status.txt", 'w')
    current_user.write(screen_name)
    current_user.close()
    _list = []
    while CURSOR != "0":
        url = "https://api.twitter.com/1.1/"+query+"/ids.json?cursor="
        url += CURSOR+"&screen_name="+screen_name+"&count=5000"
        info = CREDENTIALS[CREDENTIAL_INDEX].request(url)
        data = check_limit(info, screen_name, 'list')
        if data is not False:
            if "ids" not in data:
                print "Something went wrong, data = ", data
            _list = _list + data["ids"]
        else:
            print 'Some problem with data, URL = ', url
    CURSOR = "-1"
    return _list
Esempio n. 3
0
def get_all_accounts(csv_path, keyIndex):
    """
    #@Arguments: 
    #csv_path = Path string for the .csv file which contains all accounts information. 
    #keyIndex = The integer representing the index position for screen_name in the csv file
    #op = mode for writing the output file, 'w' by default. If none found, a new one is created
    #Return: None, only writes information on file
    """
    pu.set_unicode()
    #accounts = file_loader.loadCSV(csv_path, keyIndex)
    header = "group;subgroup;name;screen_name;lang;twitter_id;"
    header += "followers_count;listed_count;statuses_count;friends_count;favourites_count;"
    header += "url;created_at;time_zone\n"     
    #Creating files which will contain information about all accounts and their file path which will contain the followers
    f_index = file_loader.loadPath(_output_path+'index.txt')
    f_csv = file_loader.loadPath(_output_path+'all-users.csv')
    f_csv.write(header)
    total = len(accounts)
    i = 0
    #Iterating over the dictionary with all accounts
    for screen_name in accounts.keys():
        p = Person()
        #If the user is labeled in more than one group/subgroup, it will be considered the last one
        user = accounts[screen_name].pop()
        p_path = _output_path+user['group']+'/'+user['subgroup']+'/'+screen_name+".txt"
        info = get_user_info(screen_name)
        #if twitter account exists, PS: user data is at info[0]:
        if info is not False:
            f_index.write(p_path+'\n')
            save_user_info(p, info[0], user['group'], user['subgroup'], header, p_path)
            f_csv.write(str(p))
        i = i + 1
        #Printing the progress on the screen
        print 'Status: ', str(round(100*float(i)/total,2))+'% completed'
    f_index.close()
    f_csv.close()
Esempio n. 4
0
def save_user_info(p, info, user, header, p_path):
    """
    #@Arguments:
    #p = person object
    #info = a list with data
    #group = string with user group
    #subgroup = string with user subgroup
    #header = string containing the columns for the csv file
    #p_path = path to the file to write this user info 
    #Returns nothing, just write info on file
    p.loadFromJSON(info, group, subgroup)
    #Creates a file for each account and this file will contain all of this user followers
    """
    f_p = file_loader.loadPath(p_path)
    f_p.write(header)        
    f_p.write(str(p))
    log.note("Retrieved: "+user)
    f_p.close()