argparser.add_argument("-t", "--tokenizer", dest="tokenizer", help="Name of the tokenizer", default="split")
    args = argparser.parse_args()
    if not args.input:
        argparser.error("No input file defined")
    if not args.output:
        argparser.error("No output file defined")
    return args

if __name__ == "__main__":
    args = getOptions()
    with gzip.open(args.input, 'r') as f:
        documents = GraphMatrices.readInstances(f)

    instances = GraphMatrices.buildAMFromFullSentences(documents,
        MatrixBuilders.buildAdjacencyMatrix, settings, args.parser,
        args.tokenizer)

    datavector = []
    for document in instances.itervalues():
        for sentence in document.itervalues():
            for identity, instance in sentence.iteritems():
                datavector.append(instance)
    datavector = [(x[0], x[1]) for x in datavector]
    fmap = GraphMatrices.buildDictionary(datavector)

    with gzip.open(args.output, 'w') as dict_out:
        for key in fmap.keys():
            dict_out.write(key+" %d\n" %(fmap[key]))