Exemple #1
0
                    value = row[index]
                    if value != 0:
                        f.write('(' + str(index) + ',' + str(value) + ')\t')
            f.write('\n')
            for (tid, feature) in per_cluster.collect():
                f.write(tid + '\t' + str(count) + '\n')
                fi.write(tid + '\t' + str(count) + '\n')
            """
            for (tid, feature) in per_cluster.collect():
                f.write(tid)
                for row in feature.toarray():
                    for unit in range(len(row)):
                        f.write('\t')
                        f.write(str(row[unit]))
                f.write('\n')
            """
            f.close()
    fi.close()

    sc.stop()
    return


if __name__ == '__main__':

    weibo_file = '../data/no_weibo.txt'
    result_file = 'results/initial.txt'
    print "start", now()
    load_cut_to_rdd(local2mfs(weibo_file), result_file)
    print "end", now()
Exemple #2
0
        for (tid, feature) in per_cluster.collect():
            f.write(tid)
            """
            for row in feature.toarray():
                for unit in range(len(row)):
                    f.write('\t')
                    f.write(str(row[unit]))
            """
            f.write('\n')
        f.close()

    sc.stop()
    return

if __name__ == '__main__':

    # topic = "APEC-微博"
    # print topic
    input_file = "../data/source_chaijing.txt"
    output_file = "../data/out_chaijing2.txt"
    result_file = "results/result_chaijing.txt"
    print "step1", now()
    # load_data_from_mongo(topic, input_file)

    print "step2", now()
    # cut_words_local(input_file, output_file)

    print "step3", now()
    load_cut_to_rdd(local2mfs(output_file), result_file)
    print "end", now()
Exemple #3
0
            f.write(str(unit))
        f.write("\n")
    for (index, (dist, num)) in cluster_variance.collect():
        f.write(str(index))
        f.write("\t")
        f.write(str(dist))
        f.write("\t")
        f.write(str(num))
        f.write("\n")
    f.close()

    sc.stop()
    return


if __name__ == "__main__":
    topic = "APEC-微博"
    print topic
    input_file = "data/source_APEC.txt"
    output_file = "data/out_APEC.txt"
    result_file = "data/result_APEC.txt"
    print "step1", now()
    load_data_from_mongo(topic, input_file)

    print "step2", now()
    cut_words_local(input_file, output_file)

    print "step3", now()
    load_cut_to_rdd(local2mfs(output_file), result_file)
    print "end", now()
                for index in range(len(row)):
                    value = row[index]
                    if value != 0:
                        f.write('('+str(index)+','+str(value)+')\t')
            f.write('\n')
            for (tid, feature) in per_cluster.collect():
                f.write(tid+'\t'+str(count)+'\n')
                fi.write(tid+'\t'+str(count)+'\n')
            """
            for (tid, feature) in per_cluster.collect():
                f.write(tid)
                for row in feature.toarray():
                    for unit in range(len(row)):
                        f.write('\t')
                        f.write(str(row[unit]))
                f.write('\n')
            """
            f.close()
    fi.close()

    sc.stop()
    return

if __name__ == '__main__':

    weibo_file = '../data/no_weibo.txt'
    result_file = 'results/initial.txt'
    print "start", now()
    load_cut_to_rdd(local2mfs(weibo_file), result_file)
    print "end", now()