コード例 #1
0
 def singleprocess_datanum(self, test_dict):
     fu = file_util.FileUtil('../AmazonDataBackup/reviewsNew/reviewsNew.mP')
     fu.get_structure()
     for data_num in test_dict.keys():
         print data_num
         content_list = fu.get_content_list()[0:data_num]
         content_list_2_grams = summary_plot.get_2_grams_list(content_list)
         start = time.time()
         jd_list = summary_plot.get_jd_list(content_list_2_grams)
         summary_plot.get_reviews_similarity_relation(jd_list)
         finish_time = time.time() - start
         # print 'finish get relation with %s s' % finish_time
         test_dict[data_num] = finish_time
     # test_dict = collections.OrderedDict(sorted(test_dict.items()))
     return test_dict
コード例 #2
0
def draw_review_distance_multiprocess(list_num=-1, put_num=10000):
    q = Queue()
    l = Lock()
    fu = file_util.FileUtil()
    fu.open_file('../AmazonDataBackup/reviewsNew/reviewsNew.mP')
    # fu.open_file('../AmazonDataBackup/reviewsNew/reviewsNew103.mP')
    fu.get_structure()
    content_list = fu.get_content_list()[0:list_num]
    content_list_2_grams = summary_plot.get_2_grams_list(content_list)

    start = time.time()
    process_list = []
    cpu_num = cpu_count() / 2
    for i in range(0, cpu_num):
        p = Process(target=write_review_distance_to_file,
                    args=(q, l, i),
                    kwargs={'dirname': 'jaccard_distance_220000'})
        p.start()
        process_list.append(p)
    reviews_len = len(content_list_2_grams)
    print reviews_len
    count = 0
    grams_pair_list = []
    for i in range(0, reviews_len):
        for j in range(i + 1, reviews_len):
            if count != 0:
                if count % put_num == 0:
                    q.put(grams_pair_list)
                    grams_pair_list = []
                    count = 0
                if not q.empty() and count > put_num:
                    print 'queue waiting', count
                    time.sleep(0.1)
            grams_pair = [content_list_2_grams[i], content_list_2_grams[j]]
            grams_pair_list.append(grams_pair)
            count += 1
    q.put(grams_pair_list)
    for i in range(0, cpu_num):
        q.put('STOP')

    for p in process_list:
        p.join()
    finish_time = time.time() - start
    print 'exit main with %s s' % finish_time
    return finish_time
コード例 #3
0
def draw_reviewer_similarity_multiprocess():
    q = Queue()
    l = Lock()
    start = time.time()
    fu = file_util.FileUtil()
    fu.open_file('../AmazonDataBackup/reviewsNew.txt')
    fu.get_structure()
    print 'finish get_structure() with %s s' % (time.time() - start)
    # reviewer_content_dict = fu.get_reviewer_content_dict()

    process_list = []
    # producer = Process(target=producer, args=(q, l, 'producer', reviewer_content_dict))
    # p.start()
    for i in range(0, cpu_count() / 2):
        p = Process(target=write_reviewer_similarity_to_file, args=(q, l, i))
        p.start()
        process_list.append(p)

    count = 0
    reviewer_content_dict = {}
    for line in fu.structure:
        reviewer = line[0]
        if not reviewer in reviewer_content_dict.keys():
            if count % 1000 == 0 and count > 0:
                q.put(reviewer_content_dict)
                reviewer_content_dict = {}
            if not q.empty():
                time.sleep(0.01)
            reviewer_content_dict[reviewer] = []
            count += 1
        reviewer_content_dict[reviewer].append(line[-1])
    q.put(reviewer_content_dict)
    for i in range(0, cpu_count() / 2):
        q.put('STOP')
    print 'finish puting with %s s' % (time.time() - start)
    for p in process_list:
        p.join()
    finish_time = time.time() - start
    print 'exit main with %s s' % finish_time
コード例 #4
0
    global content_2_grams_list
    while not exitFlag:
        if not workQueue.empty():
            # data = q.get()
            content_list = q.get()
            content_2_grams_list = content_2_grams_list + [summary_plot.get_2_grams(content) for content in content_list]
            print "%s processing %s" % (threadName, len(content_2_grams_list))
            queueLock.release()
        else:
            queueLock.release()

def compute_jaccard_distance(threadName, q):
    while not exitFlag:
        

fu = file_util.FileUtil()
fu.open_file('../AmazonDataBackup/reviewsNew/reviewsNew.mP')
fu.get_structure()
content_list = fu.get_content_list()[0:100]

threadList = ["Thread-1", "Thread-2", "Thread-3"]
# nameList = ["One", "Two", "Three", "Four", "Five"]
queueLock = threading.Lock()
workQueue = Queue.Queue(10)
threads = []
threadID = 1

# 创建新线程
for tName in threadList:
    thread = myThread(threadID, tName, workQueue)
    thread.start()