def csvize_repost_timeline(csv_filename, type="deleted", error_code=-1, exclude_error_code=False): nowdatetime = weibo_module.get_current_chinatime() if type == "deleted": query_post_ids = weibo_module.get_deleted_postids(error_code, exclude_error_code) else: query_post_ids = weibo_module.get_all_postids() # query_post_ids = query_post_ids[:10] print query_post_ids num_query_posts = len(query_post_ids) #if we're not tracking any posts, get out of there if (num_query_posts <= 0): return ## OPEN A FILE with codecs.open(csv_filename, "wb", "utf-16") as wf: #write csv header csv_header = weibo_module.get_csv_header() wf.write(csv_header + "\n") #iterate through posts for this_post_id in query_post_ids: print "\n==WRITING POST #=========", this_post_id # okay first we get the initial post this_post = weibo_module.merge_deleted_from_new_old(this_post_id) # and then we scan the rest this_post_all_logs = weibo_module.get_all_posts(this_post_id) # and then we amass a logline -- a csv file inside a csv file! this_log_line = [] for this_log in this_post_all_logs: if 'post_repost_count' in this_log and this_log["post_repost_count"] <> None: this_log_line.append(str(this_log["post_repost_count"])) this_log_line.append(str(this_log["checked_at"])) this_log_line = weibo_settings.delim_log.join(this_log_line) #get csvline array csvline = weibo_module.make_csvline_from_post(this_post) #make each element in array unicode csvline = map((lambda x: unicode(x)), csvline) #join with delimiter csvline = weibo_settings.delim.join(csvline) #add logline csvline += weibo_settings.delim csvline += this_log_line #write csvline wf.write(csvline + "\n")
def jsonize_repost_timeline(json_filename, type="deleted", error_code=-1, exclude_error_code=False, do_obfuscate=False): nowdatetime = weibo_module.get_current_chinatime() if type == "deleted": query_post_ids = weibo_module.get_deleted_postids(error_code, exclude_error_code) else: query_post_ids = weibo_module.get_all_postids() # query_post_ids = query_post_ids[:10] # limit to the first 10 ids - for debugging print query_post_ids num_query_posts = len(query_post_ids) #if we're not tracking any posts, get out of there if (num_query_posts <= 0): return ## OPEN A FILE with codecs.open(json_filename, "wb") as wf: wf.write("[ " + "\n") #iterate through posts postno = 0 for this_post_id in query_post_ids: postno += 1 print "\n==WRITING (", postno, " / ", num_query_posts, ") POST #=========", this_post_id # okay first we get the initial post this_post = weibo_module.merge_deleted_from_new_old(this_post_id) # and then we scan the rest this_post_all_logs = weibo_module.get_all_posts(this_post_id) # and then we amass a logline this_log_list = [] for this_log in this_post_all_logs: if 'post_repost_count' in this_log and this_log["post_repost_count"] <> None and this_log["checked_at"] <> None: this_pair_dict = {} this_pair_dict["checked_at"] = str(this_log["checked_at"]) this_pair_dict["post_repost_count"] = int(this_log["post_repost_count"]) this_log_list.append(this_pair_dict) #get jsonline array jsonline = weibo_module.make_jsonlist_from_post(this_post) #merge jsonline['post_repost_log'] = this_log_list """ new_jsonline = {} #amass logline new_jsonline['post_repost_log'] = this_log_list #add other items new_jsonline['post_created_at'] = jsonline['post_created_at'] new_jsonline['post_created_at_epoch'] = jsonline['post_created_at_epoch'] new_jsonline['post_lifespan'] = jsonline['post_lifespan'] new_jsonline['last_checked_at'] = jsonline['last_checked_at'] new_jsonline['user_id'] = weibo_module.hashmod(jsonline['user_id'], weibo_settings.salt, weibo_settings.user_id_mod) new_jsonline['post_id'] = jsonline['post_id'] new_jsonline['user_name'] = jsonline['user_name'] new_jsonline['user_follower_count'] = jsonline['user_follower_count'] new_jsonline['post_text'] = jsonline['post_text'] new_jsonline['xxx'] = jsonline['xxx'] new_jsonline['xxx'] = jsonline['xxx'] new_jsonline['xxx'] = jsonline['xxx'] #new_jsonline['post_id'] = weibo_module.hashmod(jsonline['post_id'], weibo_settings.salt, weibo_settings.post_id_mod) """ #wf.write(json.dumps(jsonline, ensure_ascii=False)) wf.write(json.dumps(jsonline)) #wf.write(json.dumps(new_jsonline)) if postno != num_query_posts: wf.write(", ") wf.write("\n") wf.write(" ]" + "\n")
def csvize_repost_timeline(csv_filename, type="deleted", error_code=-1, exclude_error_code=False): nowdatetime = weibo_module.get_current_chinatime() if type == "deleted": query_post_ids = weibo_module.get_deleted_postids( error_code, exclude_error_code) else: query_post_ids = weibo_module.get_all_postids() # query_post_ids = query_post_ids[:10] print query_post_ids num_query_posts = len(query_post_ids) #if we're not tracking any posts, get out of there if (num_query_posts <= 0): return ## OPEN A FILE with codecs.open(csv_filename, "wb", "utf-16") as wf: #write csv header csv_header = weibo_module.get_csv_header() wf.write(csv_header + "\n") #iterate through posts for this_post_id in query_post_ids: print "\n==WRITING POST #=========", this_post_id # okay first we get the initial post this_post = weibo_module.merge_deleted_from_new_old(this_post_id) # and then we scan the rest this_post_all_logs = weibo_module.get_all_posts(this_post_id) # and then we amass a logline -- a csv file inside a csv file! this_log_line = [] for this_log in this_post_all_logs: if 'post_repost_count' in this_log and this_log[ "post_repost_count"] <> None: this_log_line.append(str(this_log["post_repost_count"])) this_log_line.append(str(this_log["checked_at"])) this_log_line = weibo_settings.delim_log.join(this_log_line) #get csvline array csvline = weibo_module.make_csvline_from_post(this_post) #make each element in array unicode csvline = map((lambda x: unicode(x)), csvline) #join with delimiter csvline = weibo_settings.delim.join(csvline) #add logline csvline += weibo_settings.delim csvline += this_log_line #write csvline wf.write(csvline + "\n")