예제 #1
0
        ccstr = record["cc"].strip().replace("*", "")
        ccs = filter_categories(cc.strip() for cc in ccstr.split(" "))
        for cc in ccs:
            if count.has_key(cc):
                count[cc] = count[cc] + 1
            else:
                count[cc] = 1
    return count


if __name__ == "__main__":

    args = sys.argv
    if len(sys.argv) != 3:
        print "[ERROR] Exactly two arguments are expected: input-zbl-file-path output-count-path-prefix"
        exit(-1)
    zblInPath = args[1]
    statsOutPath = args[2]

    # zliczenie
    zbl_src = zbl_io.read_zbl_records(open(zblInPath))
    count = count_categories(zbl_src, filter_XXY_categories)

    # zapis do pliku
    # f = open(statsOutPath, "w")
    # for cc in count:
    #    f.write(cc+" "+str(count[cc])+"\n")
    # f.close()
    io.fwrite_vector(statsOutPath + "_labels.svector", count.keys())
    io.fwrite_vector(statsOutPath + "_count.ivector", count.values())
예제 #2
0
        ccs     = filter_categories( cc.strip() for cc in ccstr.split(' ') )
        for cc in ccs:
            if count.has_key(cc):
                count[cc] = count[cc] + 1
            else:
                count[cc] = 1
    return count

if __name__ == "__main__":

    args = sys.argv
    if len(sys.argv) != 3:
        print "[ERROR] Exactly two arguments are expected: input-zbl-file-path output-count-path-prefix"
        exit(-1)
    zblInPath       = args[1]
    statsOutPath    = args[2]


    #zliczenie
    zbl_src = zbl_io.read_zbl_records(open(zblInPath))
    count = count_categories(zbl_src, filter_XXY_categories)

    #zapis do pliku
    #f = open(statsOutPath, "w")
    #for cc in count:
    #    f.write(cc+" "+str(count[cc])+"\n")
    #f.close()
    io.fwrite_vector(statsOutPath+"_labels.svector", count.keys())
    io.fwrite_vector(statsOutPath+"_count.ivector", count.values())

def smatrix_single_count(matrix, labels):
    """Takes symmetric co-ocurrence matrix and builds dictionary{category: its_count}"""
    return dict( ( (labels[i], matrix[i][i]) for i in xrange(len(labels))) ) #dictionary {category: its count}



if __name__ == "__main__":
    args = sys.argv
    if len(args) != 3:
        print "[ERROR] Exactly two arguments are expected: input-categories-coocurrence-matrix-path output-stats-path-prefix"
        exit(-1)
    matrixInPath    = args[1]
    statsOutPath    = args[2]


    #load matrix:
    matrix = io.fread_smatrix_data(matrixInPath)
    labels = io.fread_smatrix_labels(matrixInPath)[0]

    #counting
    single_count    = smatrix_single_count(matrix, labels)
    pair_count      = smatrix_pair_count(matrix, labels)

    #storing:
    io.fwrite_vector(statsOutPath+"_single_labels.svector", single_count.keys())
    io.fwrite_vector(statsOutPath+"_single_count.ivector", single_count.values())
    io.fwrite_vector(statsOutPath+"_pair_labels.svector", pair_count.keys())
    io.fwrite_vector(statsOutPath+"_pair_count.ivector", pair_count.values())