def dump_to_csv(self, output_csv, input_fields): count = 0 tweet_parser = TweetParser() filehandle = open(output_csv, 'wb') writer = unicodecsv.writer(filehandle) expanded_fields = [] expanded_fields_list_keys = [] for field_path in input_fields: fields = field_path.split('.') if fields[-1].isdigit(): expanded_fields_list_keys.append((fields[0:len(fields)-1], fields[len(fields)-1])) if fields[0:len(fields)-1] not in expanded_fields: expanded_fields.append(fields[0:len(fields)-1]) else: expanded_fields.append(fields) for tweet in self.get_iterator(): #use json.loads and not json_util #to get a regular dict tweet = json.loads(json_util.dumps(tweet)) row_to_write = [] flat_tweet_list = [] # flatten each tweet, and put the resulting tuples # in a list for flat_entry in tweet_parser.flatten_dict(tweet): flat_tweet_list.append(flat_entry) # write a header if its the first # tweet if count == 0: writer.writerow(input_fields) count += 1 # if each flattened key path # is a path the user wants add # it to be a row to write for expanded_field in expanded_fields: for tweet_tuple in flat_tweet_list: if tweet_tuple[0] == expanded_field: if isinstance(tweet_tuple[1], list): # for each possible array index for list_key in expanded_fields_list_keys: if list_key[0] == tweet_tuple[0] and int(list_key[1]) < len(tweet_tuple[1]): row_to_write.append(json_util.dumps(tweet_tuple[1][int(list_key[1])])) else: row_to_write.append('None') else: if isinstance(tweet_tuple[1], str): row_to_write.append(tweet_tuple[1].encode('utf-8').decode('utf-8')) else: row_to_write.append(tweet_tuple[1]) #convert each thing to unicode writer.writerow(row_to_write) filehandle.close()