def singleprocess_datanum(self, test_dict): fu = file_util.FileUtil('../AmazonDataBackup/reviewsNew/reviewsNew.mP') fu.get_structure() for data_num in test_dict.keys(): print data_num content_list = fu.get_content_list()[0:data_num] content_list_2_grams = summary_plot.get_2_grams_list(content_list) start = time.time() jd_list = summary_plot.get_jd_list(content_list_2_grams) summary_plot.get_reviews_similarity_relation(jd_list) finish_time = time.time() - start # print 'finish get relation with %s s' % finish_time test_dict[data_num] = finish_time # test_dict = collections.OrderedDict(sorted(test_dict.items())) return test_dict
def draw_review_distance_multiprocess(list_num=-1, put_num=10000): q = Queue() l = Lock() fu = file_util.FileUtil() fu.open_file('../AmazonDataBackup/reviewsNew/reviewsNew.mP') # fu.open_file('../AmazonDataBackup/reviewsNew/reviewsNew103.mP') fu.get_structure() content_list = fu.get_content_list()[0:list_num] content_list_2_grams = summary_plot.get_2_grams_list(content_list) start = time.time() process_list = [] cpu_num = cpu_count() / 2 for i in range(0, cpu_num): p = Process(target=write_review_distance_to_file, args=(q, l, i), kwargs={'dirname': 'jaccard_distance_220000'}) p.start() process_list.append(p) reviews_len = len(content_list_2_grams) print reviews_len count = 0 grams_pair_list = [] for i in range(0, reviews_len): for j in range(i + 1, reviews_len): if count != 0: if count % put_num == 0: q.put(grams_pair_list) grams_pair_list = [] count = 0 if not q.empty() and count > put_num: print 'queue waiting', count time.sleep(0.1) grams_pair = [content_list_2_grams[i], content_list_2_grams[j]] grams_pair_list.append(grams_pair) count += 1 q.put(grams_pair_list) for i in range(0, cpu_num): q.put('STOP') for p in process_list: p.join() finish_time = time.time() - start print 'exit main with %s s' % finish_time return finish_time
def draw_review_distance_multiprocess(list_num=-1, put_num=10000): q = Queue() l = Lock() fu = file_util.FileUtil() fu.open_file("../AmazonDataBackup/reviewsNew/reviewsNew.mP") # fu.open_file('../AmazonDataBackup/reviewsNew/reviewsNew103.mP') fu.get_structure() content_list = fu.get_content_list()[0:list_num] content_list_2_grams = summary_plot.get_2_grams_list(content_list) start = time.time() process_list = [] cpu_num = cpu_count() / 2 for i in range(0, cpu_num): p = Process(target=write_review_distance_to_file, args=(q, l, i), kwargs={"dirname": "jaccard_distance_220000"}) p.start() process_list.append(p) reviews_len = len(content_list_2_grams) print reviews_len count = 0 grams_pair_list = [] for i in range(0, reviews_len): for j in range(i + 1, reviews_len): if count != 0: if count % put_num == 0: q.put(grams_pair_list) grams_pair_list = [] count = 0 if not q.empty() and count > put_num: print "queue waiting", count time.sleep(0.1) grams_pair = [content_list_2_grams[i], content_list_2_grams[j]] grams_pair_list.append(grams_pair) count += 1 q.put(grams_pair_list) for i in range(0, cpu_num): q.put("STOP") for p in process_list: p.join() finish_time = time.time() - start print "exit main with %s s" % finish_time return finish_time