def send_all(f, sender):
    count = 0
    tb = time.time()
    ts = tb

    try:
        for line in f:
            weibo_item = itemLine2Dict(line)
            if weibo_item:
                weibo_item_bin = csv2bin(weibo_item)
                sender.send_json(weibo_item_bin)
                count += 1

            if count % 10000 == 0:
                te = time.time()
                print '[%s] deliver speed: %s sec/per %s' % (datetime.now(
                ).strftime('%Y-%m-%d %H:%M:%S'), te - ts, 10000)
                if count % 100000 == 0:
                    print '[%s] total deliver %s, cost %s sec [avg %s per/sec]' % (
                        datetime.now().strftime('%Y-%m-%d %H:%M:%S'), count,
                        te - tb, count / (te - tb))
                #time.sleep(2.5)
                ts = te
    except:
        print 'error'
    total_cost = time.time() - tb
    return count, total_cost
def send_all(f, sender):
    count = 0
    count_send = 0
    tb = time.time()
    ts = tb
    weibo_list = []
    weibo_send = []

    try:
        for line in f:
            weibo_item = itemLine2Dict(line)
            if weibo_item:
                weibo_item_bin = csv2bin(weibo_item)
                if int(weibo_item_bin['sp_type']) != 1:
                    continue
                sender.send_json(weibo_item_bin)
                count += 1

            if count % 10000 == 0:
                te = time.time()
                print '[%s] read csv speed: %s sec/per %s' % (datetime.now(
                ).strftime('%Y-%m-%d %H:%M:%S'), te - ts, 10000)
                ts = te
                time.sleep(1)
    except:
        print "pass"
    total_cost = time.time() - tb
    return count, total_cost
Beispiel #3
0
def send_all(f, sender):
    count = 0
    tb = time.time()
    ts = tb

    try:
        for line in f:
            weibo_item = itemLine2Dict(line)
            if weibo_item:
                weibo_item_bin = csv2bin(weibo_item)
                if int(weibo_item_bin['sp_type']) != 1:
                    continue
                sender.send_json(weibo_item_bin)
                count += 1

            if count % 10000 == 0:
                te = time.time()
                print '[%s] deliver speed: %s sec/per %s' % (datetime.now(
                ).strftime('%Y-%m-%d %H:%M:%S'), te - ts, 10000)
                if count % 100000 == 0:
                    print '[%s] total deliver %s, cost %s sec [avg %s per/sec]' % (
                        datetime.now().strftime('%Y-%m-%d %H:%M:%S'), count,
                        te - tb, count / (te - tb))
                time.sleep(2)
                ts = te
    except Exception, r:
        print Exception, r
def send_all(f, sender):
    count = 0
    count_send = 0
    tb = time.time()
    ts = tb
    weibo_list = []
    weibo_send = []

    try:
        for line in f:
            weibo_item = itemLine2Dict(line)
            if weibo_item:
                weibo_item_bin = csv2bin(weibo_item)
                if int(weibo_item_bin['sp_type']) != 1:
                    continue
                sender.send_json(weibo_item_bin)
                count += 1

            if count % 10000 == 0:
                te = time.time()
                print '[%s] read csv speed: %s sec/per %s' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), te - ts, 10000)
                print '[%s] total send filter weibo %s, cost %s sec [avg %s per/sec]' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), count_send, te - tb, count / (te - tb))
                ts = te
    except:
        print "pass"
    total_cost = time.time() - tb
    return count, total_cost
Beispiel #5
0
def send_all(f, sender):
    count = 0
    count_send = 0
    tb = time.time()
    ts = tb
    weibo_list = []
    weibo_send = []

    for line in f:
        weibo_item = itemLine2Dict(line)
        if weibo_item:
            weibo_item_bin = csv2bin(weibo_item)
            if int(weibo_item_bin['sp_type']) != 1:
                continue
            weibo_send.append(weibo_item_bin)
            weibo_list.append([weibo_item_bin['mid'], weibo_item_bin['text'].encode('utf-8')])
            count += 1

        if count % 10000 == 0:
            results_set = filter_ad(weibo_list)
            count_send = send_filter(results_set, weibo_send, count_send, sender)
            weibo_list = []
            weibo_send = []
            te = time.time()
            print '[%s] read csv speed: %s sec/per %s' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), te - ts, 10000)
            print '[%s] total send filter weibo %s, cost %s sec [avg %s per/sec]' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), count_send, te - tb, count / (te - tb))
            ts = te
    if weibo_list:
        results_set = filter_ad(weibo_list)
        count_send = send_filter(results_set, weibo_send, count_send, sender)
        total_cost = time.time() - tb
    return count_send, total_cost
def send_all(f, sender):
    count = 0
    tb = time.time()
    ts = tb

    try:
        for line in f:
            weibo_item = itemLine2Dict(line)
            if weibo_item:
                weibo_item_bin = csv2bin(weibo_item)
                if int(weibo_item_bin["sp_type"]) != 1:
                    continue
                sender.send_json(weibo_item_bin)
                count += 1

            if count % 10000 == 0:
                te = time.time()
                print "[%s] deliver speed: %s sec/per %s" % (
                    datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                    te - ts,
                    10000,
                )
                if count % 100000 == 0:
                    print "[%s] total deliver %s, cost %s sec [avg %s per/sec]" % (
                        datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                        count,
                        te - tb,
                        count / (te - tb),
                    )
                time.sleep(2)
                ts = te
    except Exception, r:
        print Exception, r
def send_all(f, sender):
    count = 0
    tb = time.time()
    ts = tb
    try:
        for line in f:
            weibo_item = itemLine2Dict(line)
            if weibo_item:
                weibo_item_bin = csv2bin(weibo_item)
                sender.send_json(weibo_item_bin)
                count += 1

            if count % 10000 == 0:
                te = time.time()
                print '[%s] deliver speed: %s sec/per %s' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), te - ts, 10000)
                if count % 100000 == 0:
                    print '[%s] total deliver %s, cost %s sec [avg %s per/sec]' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), count, te - tb, count / (te - tb))
                ts = te
    except Exception, e:
        pass
Beispiel #8
0
uid_text = file('uid_text.csv', 'wb')
writer = csv.writer(uid_text)
count = 0
count_f = 0

file_list = set(os.listdir(CSV_FILE_PATH))
print "total file is ", len(file_list)

for each in file_list:
    with open(os.path.join(CSV_FILE_PATH, each), 'rb') as f:
        try:
            for line in f:
                count_f += 1
                weibo_item = itemLine2Dict(line)
                if weibo_item:
                    weibo_item_bin = csv2bin(weibo_item)
                    if int(weibo_item_bin['sp_type']) != 1:
                        continue
                    if not str(weibo_item_bin['uid']) in uid_set:
                        continue
                    text = weibo_item_bin['text']
                    if weibo_item_bin['message_type'] == 1:
                        write_text = text
                    elif weibo_item_bin['message_type'] == 2:
                        temp = text.split('//@')[0].split(':')[1:]
                        write_text = ''.join(temp)
                    else:
                        continue
                    item = [str(weibo_item_bin['uid']), write_text]

                    if write_text != "":