value = row[index] if value != 0: f.write('(' + str(index) + ',' + str(value) + ')\t') f.write('\n') for (tid, feature) in per_cluster.collect(): f.write(tid + '\t' + str(count) + '\n') fi.write(tid + '\t' + str(count) + '\n') """ for (tid, feature) in per_cluster.collect(): f.write(tid) for row in feature.toarray(): for unit in range(len(row)): f.write('\t') f.write(str(row[unit])) f.write('\n') """ f.close() fi.close() sc.stop() return if __name__ == '__main__': weibo_file = '../data/no_weibo.txt' result_file = 'results/initial.txt' print "start", now() load_cut_to_rdd(local2mfs(weibo_file), result_file) print "end", now()
for (tid, feature) in per_cluster.collect(): f.write(tid) """ for row in feature.toarray(): for unit in range(len(row)): f.write('\t') f.write(str(row[unit])) """ f.write('\n') f.close() sc.stop() return if __name__ == '__main__': # topic = "APEC-微博" # print topic input_file = "../data/source_chaijing.txt" output_file = "../data/out_chaijing2.txt" result_file = "results/result_chaijing.txt" print "step1", now() # load_data_from_mongo(topic, input_file) print "step2", now() # cut_words_local(input_file, output_file) print "step3", now() load_cut_to_rdd(local2mfs(output_file), result_file) print "end", now()
f.write(str(unit)) f.write("\n") for (index, (dist, num)) in cluster_variance.collect(): f.write(str(index)) f.write("\t") f.write(str(dist)) f.write("\t") f.write(str(num)) f.write("\n") f.close() sc.stop() return if __name__ == "__main__": topic = "APEC-微博" print topic input_file = "data/source_APEC.txt" output_file = "data/out_APEC.txt" result_file = "data/result_APEC.txt" print "step1", now() load_data_from_mongo(topic, input_file) print "step2", now() cut_words_local(input_file, output_file) print "step3", now() load_cut_to_rdd(local2mfs(output_file), result_file) print "end", now()
for index in range(len(row)): value = row[index] if value != 0: f.write('('+str(index)+','+str(value)+')\t') f.write('\n') for (tid, feature) in per_cluster.collect(): f.write(tid+'\t'+str(count)+'\n') fi.write(tid+'\t'+str(count)+'\n') """ for (tid, feature) in per_cluster.collect(): f.write(tid) for row in feature.toarray(): for unit in range(len(row)): f.write('\t') f.write(str(row[unit])) f.write('\n') """ f.close() fi.close() sc.stop() return if __name__ == '__main__': weibo_file = '../data/no_weibo.txt' result_file = 'results/initial.txt' print "start", now() load_cut_to_rdd(local2mfs(weibo_file), result_file) print "end", now()