def csvize_deleted_unique(csv_filename, error_code=-1, exclude_error_code=False):

	nowdatetime = weibo_module.get_current_chinatime()

	deleted_post_ids = weibo_module.get_deleted_postids(error_code, exclude_error_code)
	num_dead_posts = len(deleted_post_ids)

	#if we're not tracking any posts, get out of there
	if (num_dead_posts <= 0):
		return

	## OPEN A FILE
	with codecs.open(csv_filename, "wb", "utf-16") as wf:

		#write csv header
		csv_header = weibo_module.get_csv_header()
		wf.write(csv_header + "\n")
		#print csv_header

		#iterate through posts
		for this_post_id in deleted_post_ids:

			this_post = weibo_module.merge_deleted_from_new_old(this_post_id)

			csvline = weibo_module.make_csvline_from_post(this_post)

		
			csvline = map((lambda x: unicode(x)), csvline)

			#not csv, this is our delimiter now
			csvline = weibo_settings.delim.join(csvline)

#			#print csvline
			wf.write(csvline + "\n")
def csvize_repost_timeline(csv_filename, type="deleted", error_code=-1, exclude_error_code=False):

	nowdatetime = weibo_module.get_current_chinatime()

	if type == "deleted":
		query_post_ids = weibo_module.get_deleted_postids(error_code, exclude_error_code)
	else:
		query_post_ids = weibo_module.get_all_postids()
#		query_post_ids = query_post_ids[:10]
		print query_post_ids

	num_query_posts = len(query_post_ids)

	#if we're not tracking any posts, get out of there
	if (num_query_posts <= 0):
		return

	## OPEN A FILE
	with codecs.open(csv_filename, "wb", "utf-16") as wf:

		#write csv header
		csv_header = weibo_module.get_csv_header()
		wf.write(csv_header + "\n")

		#iterate through posts
		for this_post_id in query_post_ids:

			print "\n==WRITING POST #=========", this_post_id

			# okay first we get the initial post
			this_post = weibo_module.merge_deleted_from_new_old(this_post_id)

			# and then we scan the rest 
			this_post_all_logs =  weibo_module.get_all_posts(this_post_id)

			# and then we amass a logline -- a csv file inside a csv file!
			this_log_line = []
			for this_log in this_post_all_logs:
				if 'post_repost_count' in this_log and this_log["post_repost_count"] <> None:
					this_log_line.append(str(this_log["post_repost_count"]))
					this_log_line.append(str(this_log["checked_at"]))
			this_log_line = weibo_settings.delim_log.join(this_log_line)

			#get csvline array
			csvline = weibo_module.make_csvline_from_post(this_post)

			#make each element in array unicode
			csvline = map((lambda x: unicode(x)), csvline)

			#join with delimiter
			csvline = weibo_settings.delim.join(csvline)

			#add logline
			csvline += weibo_settings.delim
			csvline += this_log_line

			#write  csvline
			wf.write(csvline + "\n")
Example #3
0
def list_deleted_posts():

	deleted_post_ids = weibo_module.get_deleted_postids()

	print ""
	print "########## DEAD POSTS ##########"
	num_dead_posts = len(deleted_post_ids)
	print "# of dead posts we've tracked: " + str(num_dead_posts)
	if (num_dead_posts > 0):
		dead_post_ids = []

		for this_post_id in deleted_post_ids:
			print "Checking post # " + this_post_id , "::::: ",

			# get the post info from postids_live collection,
			# since if the post was deleted we wouldn't have any of that info anymore

			this_post_deleted = weibo_module.merge_deleted_from_new_old(this_post_id)
			print "ERRORCODE = " , this_post_deleted["error_code"]

			print "alive: new/old repost count (" , this_post_deleted["post_repost_count"] , " / " , this_post_deleted["post_repost_count_initial"] , ") "
Example #4
0
def list_deleted_posts():

    deleted_post_ids = weibo_module.get_deleted_postids()

    print ""
    print "########## DEAD POSTS ##########"
    num_dead_posts = len(deleted_post_ids)
    print "# of dead posts we've tracked: " + str(num_dead_posts)
    if (num_dead_posts > 0):
        dead_post_ids = []

        for this_post_id in deleted_post_ids:
            print "Checking post # " + this_post_id, "::::: ",

            # get the post info from postids_live collection,
            # since if the post was deleted we wouldn't have any of that info anymore

            this_post_deleted = weibo_module.merge_deleted_from_new_old(
                this_post_id)
            print "ERRORCODE = ", this_post_deleted["error_code"]

            print "alive: new/old repost count (", this_post_deleted[
                "post_repost_count"], " / ", this_post_deleted[
                    "post_repost_count_initial"], ") "
def csvize_deleted_unique(csv_filename,
                          error_code=-1,
                          exclude_error_code=False):

    nowdatetime = weibo_module.get_current_chinatime()

    deleted_post_ids = weibo_module.get_deleted_postids(
        error_code, exclude_error_code)
    num_dead_posts = len(deleted_post_ids)

    #if we're not tracking any posts, get out of there
    if (num_dead_posts <= 0):
        return

    ## OPEN A FILE
    with codecs.open(csv_filename, "wb", "utf-16") as wf:

        #write csv header
        csv_header = weibo_module.get_csv_header()
        wf.write(csv_header + "\n")
        #print csv_header

        #iterate through posts
        for this_post_id in deleted_post_ids:

            this_post = weibo_module.merge_deleted_from_new_old(this_post_id)

            csvline = weibo_module.make_csvline_from_post(this_post)

            csvline = map((lambda x: unicode(x)), csvline)

            #not csv, this is our delimiter now
            csvline = weibo_settings.delim.join(csvline)

            #			#print csvline
            wf.write(csvline + "\n")
def jsonize_repost_timeline(json_filename, type="deleted", error_code=-1, exclude_error_code=False, do_obfuscate=False):

	nowdatetime = weibo_module.get_current_chinatime()

	if type == "deleted":
		query_post_ids = weibo_module.get_deleted_postids(error_code, exclude_error_code)
	else:
		query_post_ids = weibo_module.get_all_postids()
#		query_post_ids = query_post_ids[:10] # limit to the first 10 ids - for debugging

		print query_post_ids

	num_query_posts = len(query_post_ids)

	#if we're not tracking any posts, get out of there
	if (num_query_posts <= 0):
		return

	## OPEN A FILE
	with codecs.open(json_filename, "wb") as wf:

		wf.write("[ " + "\n")

		#iterate through posts
		postno = 0
		for this_post_id in query_post_ids:

			postno += 1
			print "\n==WRITING (", postno, " / ", num_query_posts, ") POST #=========", this_post_id

			# okay first we get the initial post
			this_post = weibo_module.merge_deleted_from_new_old(this_post_id)

			# and then we scan the rest 
			this_post_all_logs =  weibo_module.get_all_posts(this_post_id)

			# and then we amass a logline 
			this_log_list = []
			for this_log in this_post_all_logs:
				if 'post_repost_count' in this_log and this_log["post_repost_count"] <> None and this_log["checked_at"] <> None:
					this_pair_dict = {}
					this_pair_dict["checked_at"] = str(this_log["checked_at"])
					this_pair_dict["post_repost_count"] = int(this_log["post_repost_count"])

					this_log_list.append(this_pair_dict)

			#get jsonline array
			jsonline = weibo_module.make_jsonlist_from_post(this_post)

			#merge 
			jsonline['post_repost_log'] = this_log_list

			"""
			new_jsonline = {}

			#amass logline
			new_jsonline['post_repost_log'] = this_log_list

			#add other items
			new_jsonline['post_created_at'] = jsonline['post_created_at']
			new_jsonline['post_created_at_epoch'] = jsonline['post_created_at_epoch']
			new_jsonline['post_lifespan'] = jsonline['post_lifespan']
			new_jsonline['last_checked_at'] = jsonline['last_checked_at']
			new_jsonline['user_id'] = weibo_module.hashmod(jsonline['user_id'], weibo_settings.salt, weibo_settings.user_id_mod)
			new_jsonline['post_id'] = jsonline['post_id']
			new_jsonline['user_name'] = jsonline['user_name']
			new_jsonline['user_follower_count'] = jsonline['user_follower_count']
			new_jsonline['post_text'] = jsonline['post_text']
			new_jsonline['xxx'] = jsonline['xxx']
			new_jsonline['xxx'] = jsonline['xxx']
			new_jsonline['xxx'] = jsonline['xxx']
			#new_jsonline['post_id'] = weibo_module.hashmod(jsonline['post_id'], weibo_settings.salt, weibo_settings.post_id_mod)
			"""
			#wf.write(json.dumps(jsonline, ensure_ascii=False))
			wf.write(json.dumps(jsonline))
			#wf.write(json.dumps(new_jsonline))

			if postno != num_query_posts:
				wf.write(", ")

			wf.write("\n")

		wf.write(" ]" + "\n")
import codecs
import pymongo
import csv
import json
import datetime
from dateutil.parser import parse
import requests
import logging
import re
import os
import weibo_settings
import weibo_module

this_post_id = "3622155533096449"

this_post = weibo_module.merge_deleted_from_new_old(this_post_id)

csvline = (
    this_post["post_id"],
    this_post["user_id"],
    this_post["user_name"],
    this_post["user_follower_count_initial"],
    this_post["user_follower_count"],
    this_post["post_original_pic"],
    this_post["post_created_at"],
    this_post["post_repost_count_initial"],
    this_post["post_repost_count"],
    this_post["post_text"],
    this_post["started_tracking_at"],
    this_post["is_deleted"],
    this_post["is_retired"],
def csvize_repost_timeline(csv_filename,
                           type="deleted",
                           error_code=-1,
                           exclude_error_code=False):

    nowdatetime = weibo_module.get_current_chinatime()

    if type == "deleted":
        query_post_ids = weibo_module.get_deleted_postids(
            error_code, exclude_error_code)
    else:
        query_post_ids = weibo_module.get_all_postids()
        #		query_post_ids = query_post_ids[:10]
        print query_post_ids

    num_query_posts = len(query_post_ids)

    #if we're not tracking any posts, get out of there
    if (num_query_posts <= 0):
        return

    ## OPEN A FILE
    with codecs.open(csv_filename, "wb", "utf-16") as wf:

        #write csv header
        csv_header = weibo_module.get_csv_header()
        wf.write(csv_header + "\n")

        #iterate through posts
        for this_post_id in query_post_ids:

            print "\n==WRITING POST #=========", this_post_id

            # okay first we get the initial post
            this_post = weibo_module.merge_deleted_from_new_old(this_post_id)

            # and then we scan the rest
            this_post_all_logs = weibo_module.get_all_posts(this_post_id)

            # and then we amass a logline -- a csv file inside a csv file!
            this_log_line = []
            for this_log in this_post_all_logs:
                if 'post_repost_count' in this_log and this_log[
                        "post_repost_count"] <> None:
                    this_log_line.append(str(this_log["post_repost_count"]))
                    this_log_line.append(str(this_log["checked_at"]))
            this_log_line = weibo_settings.delim_log.join(this_log_line)

            #get csvline array
            csvline = weibo_module.make_csvline_from_post(this_post)

            #make each element in array unicode
            csvline = map((lambda x: unicode(x)), csvline)

            #join with delimiter
            csvline = weibo_settings.delim.join(csvline)

            #add logline
            csvline += weibo_settings.delim
            csvline += this_log_line

            #write  csvline
            wf.write(csvline + "\n")
import codecs
import pymongo
import csv
import json
import datetime
from dateutil.parser import parse
import requests
import logging
import re
import os
import weibo_settings
import weibo_module

this_post_id = "3622155533096449"

this_post = weibo_module.merge_deleted_from_new_old(this_post_id)

csvline = this_post["post_id"], this_post["user_id"], this_post[
    "user_name"], this_post["user_follower_count_initial"], this_post[
        "user_follower_count"], this_post["post_original_pic"], this_post[
            "post_created_at"], this_post[
                "post_repost_count_initial"], this_post[
                    "post_repost_count"], this_post["post_text"], this_post[
                        "started_tracking_at"], this_post[
                            "is_deleted"], this_post["is_retired"], this_post[
                                "error_message"], this_post[
                                    "error_code"], this_post[
                                        "last_checked_at"], this_post[
                                            "post_lifespan"]

csvline = map((lambda x: unicode(x)), csvline)