Exemple #1
0
def send_all(f, sender):
    count = 0
    tb = time.time()
    ts = tb

    try:
        for line in f:
            weibo_item = itemLine2Dict(line)
            if weibo_item:
                weibo_item_bin = csv2bin(weibo_item)
                if int(weibo_item_bin['sp_type']) != 1:
                    continue
                sender.send_json(weibo_item_bin)
                count += 1

            if count % 10000 == 0:
                te = time.time()
                print '[%s] deliver speed: %s sec/per %s' % (datetime.now(
                ).strftime('%Y-%m-%d %H:%M:%S'), te - ts, 10000)
                if count % 100000 == 0:
                    print '[%s] total deliver %s, cost %s sec [avg %s per/sec]' % (
                        datetime.now().strftime('%Y-%m-%d %H:%M:%S'), count,
                        te - tb, count / (te - tb))
                time.sleep(2)
                ts = te
    except Exception, r:
        print Exception, r
def send_all(f, sender):
    count = 0
    tb = time.time()
    ts = tb

    try:
        for line in f:
            weibo_item = itemLine2Dict(line)
            if weibo_item:
                weibo_item_bin = csv2bin(weibo_item)
                sender.send_json(weibo_item_bin)
                count += 1

            if count % 10000 == 0:
                te = time.time()
                print '[%s] deliver speed: %s sec/per %s' % (datetime.now(
                ).strftime('%Y-%m-%d %H:%M:%S'), te - ts, 10000)
                if count % 100000 == 0:
                    print '[%s] total deliver %s, cost %s sec [avg %s per/sec]' % (
                        datetime.now().strftime('%Y-%m-%d %H:%M:%S'), count,
                        te - tb, count / (te - tb))
                #time.sleep(2.5)
                ts = te
    except:
        print 'error'
    total_cost = time.time() - tb
    return count, total_cost
def csv2reposts():
    path = './csv/'
    files = os.listdir(path)
    print 'files count: ', len(files)
    count = 0
    ts = te = time.time()
    for fname in files:
        f = open(path + fname, 'r')
        for line in f:
            count += 1
            if count % 10000 == 0:
                te = time.time()
                print count, '%s sec' % (te - ts)
                ts = te

            itemdict = itemLine2Dict(line)

            if itemdict and itemdict['retweeted_mid'] and itemdict['retweeted_mid'] != '':
            	retweeted_mid = itemdict['retweeted_mid']
                retweeted_uid = itemdict['retweeted_uid']
                mid = itemdict['_id']
                user = itemdict['user']
                timestamp = itemdict['timestamp']
                key = str(retweeted_mid) 
                try:
            	    reposts = json.loads(weibo_repost_bucket.Get(key))
                    reposts.extend([str(mid) + '_' + str(user) + '_' + str(timestamp)])
                except KeyError:
                    reposts = [str(mid) + '_' + str(user) + '_' + str(timestamp)]

                weibo_repost_bucket.Put(key, json.dumps(reposts))

        f.close()
def send_all(f, sender):
    count = 0
    count_send = 0
    tb = time.time()
    ts = tb
    weibo_list = []
    weibo_send = []

    try:
        for line in f:
            weibo_item = itemLine2Dict(line)
            if weibo_item:
                weibo_item_bin = csv2bin(weibo_item)
                if int(weibo_item_bin['sp_type']) != 1:
                    continue
                sender.send_json(weibo_item_bin)
                count += 1

            if count % 10000 == 0:
                te = time.time()
                print '[%s] read csv speed: %s sec/per %s' % (datetime.now(
                ).strftime('%Y-%m-%d %H:%M:%S'), te - ts, 10000)
                ts = te
                time.sleep(1)
    except:
        print "pass"
    total_cost = time.time() - tb
    return count, total_cost
def send_all(f, sender):
    count = 0
    count_send = 0
    tb = time.time()
    ts = tb
    weibo_list = []
    weibo_send = []

    try:
        for line in f:
            weibo_item = itemLine2Dict(line)
            if weibo_item:
                weibo_item_bin = csv2bin(weibo_item)
                if int(weibo_item_bin['sp_type']) != 1:
                    continue
                sender.send_json(weibo_item_bin)
                count += 1

            if count % 10000 == 0:
                te = time.time()
                print '[%s] read csv speed: %s sec/per %s' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), te - ts, 10000)
                print '[%s] total send filter weibo %s, cost %s sec [avg %s per/sec]' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), count_send, te - tb, count / (te - tb))
                ts = te
    except:
        print "pass"
    total_cost = time.time() - tb
    return count, total_cost
def send_all(f, sender):
    count = 0
    count_send = 0
    tb = time.time()
    ts = tb
    weibo_list = []
    weibo_send = []

    for line in f:
        weibo_item = itemLine2Dict(line)
        if weibo_item:
            weibo_item_bin = csv2bin(weibo_item)
            if int(weibo_item_bin['sp_type']) != 1:
                continue
            weibo_send.append(weibo_item_bin)
            weibo_list.append([weibo_item_bin['mid'], weibo_item_bin['text'].encode('utf-8')])
            count += 1

        if count % 10000 == 0:
            results_set = filter_ad(weibo_list)
            count_send = send_filter(results_set, weibo_send, count_send, sender)
            weibo_list = []
            weibo_send = []
            te = time.time()
            print '[%s] read csv speed: %s sec/per %s' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), te - ts, 10000)
            print '[%s] total send filter weibo %s, cost %s sec [avg %s per/sec]' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), count_send, te - tb, count / (te - tb))
            ts = te
    if weibo_list:
        results_set = filter_ad(weibo_list)
        count_send = send_filter(results_set, weibo_send, count_send, sender)
        total_cost = time.time() - tb
    return count_send, total_cost
def send_all(f, sender):
    count = 0
    tb = time.time()
    ts = tb

    try:
        for line in f:
            weibo_item = itemLine2Dict(line)
            if weibo_item:
                weibo_item_bin = csv2bin(weibo_item)
                if int(weibo_item_bin["sp_type"]) != 1:
                    continue
                sender.send_json(weibo_item_bin)
                count += 1

            if count % 10000 == 0:
                te = time.time()
                print "[%s] deliver speed: %s sec/per %s" % (
                    datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                    te - ts,
                    10000,
                )
                if count % 100000 == 0:
                    print "[%s] total deliver %s, cost %s sec [avg %s per/sec]" % (
                        datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                        count,
                        te - tb,
                        count / (te - tb),
                    )
                time.sleep(2)
                ts = te
    except Exception, r:
        print Exception, r
def send_all(f, sender):
    count = 0
    tb = time.time()
    ts = tb
    try:
        for line in f:
            weibo_item = itemLine2Dict(line)
            if weibo_item:
                weibo_item_bin = csv2bin(weibo_item)
                sender.send_json(weibo_item_bin)
                count += 1

            if count % 10000 == 0:
                te = time.time()
                print '[%s] deliver speed: %s sec/per %s' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), te - ts, 10000)
                if count % 100000 == 0:
                    print '[%s] total deliver %s, cost %s sec [avg %s per/sec]' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), count, te - tb, count / (te - tb))
                ts = te
    except Exception, e:
        pass
Exemple #9
0
        if count_n == 100:
            break
uid_text = file('uid_text.csv', 'wb')
writer = csv.writer(uid_text)
count = 0
count_f = 0

file_list = set(os.listdir(CSV_FILE_PATH))
print "total file is ", len(file_list)

for each in file_list:
    with open(os.path.join(CSV_FILE_PATH, each), 'rb') as f:
        try:
            for line in f:
                count_f += 1
                weibo_item = itemLine2Dict(line)
                if weibo_item:
                    weibo_item_bin = csv2bin(weibo_item)
                    if int(weibo_item_bin['sp_type']) != 1:
                        continue
                    if not str(weibo_item_bin['uid']) in uid_set:
                        continue
                    text = weibo_item_bin['text']
                    if weibo_item_bin['message_type'] == 1:
                        write_text = text
                    elif weibo_item_bin['message_type'] == 2:
                        temp = text.split('//@')[0].split(':')[1:]
                        write_text = ''.join(temp)
                    else:
                        continue
                    item = [str(weibo_item_bin['uid']), write_text]
 def csv_input_pre_func(item):
     item = itemLine2Dict(item)
     return item
Exemple #11
0
 def csv_input_pre_func(item):
     item = itemLine2Dict(item)
     return item