def save(data, outputdir, outputfile, outputformat): """ Save data to a variety of formats Automatically determines whether data is an array or an RDD and handles appropriately For RDDs, data are sorted and reshaped based on the keys :param data: RDD of key value pairs or array :param outputdir: Location to save data to :param outputfile: file name to save data to :param outputformat: format for data ("matlab", "text", or "image") """ filename = os.path.join(outputdir, outputfile) if (outputformat == "matlab") | (outputformat == "text"): if isrdd(data): dims = getdims(data) data = subtoind(data, dims.max) keys = data.map(lambda (k, _): int(k)).collect() nout = size(data.first()[1]) if nout > 1: for iout in range(0, nout): result = data.map(lambda (_, v): float16(v[iout])).collect() result = array([v for (k, v) in sorted(zip(keys, result), key=lambda (k, v): k)]) if outputformat == "matlab": savemat(filename+"-"+str(iout)+".mat", mdict={outputfile+str(iout): squeeze(transpose(reshape(result, dims.num[::-1])))}, oned_as='column', do_compression='true') if outputformat == "text": savetxt(filename+"-"+str(iout)+".txt", result, fmt="%.6f") else: result = data.map(lambda (_, v): float16(v)).collect() result = array([v for (k, v) in sorted(zip(keys, result), key=lambda (k, v): k)])
result = array([v for (k, v) in sorted(zip(keys, result), key=lambda (k, v): k)]) if outputformat == "matlab": savemat(filename+".mat", mdict={outputfile: squeeze(transpose(reshape(result, dims.num[::-1])))}, oned_as='column', do_compression='true') if outputformat == "text": savetxt(filename+".txt", result, fmt="%.6f") else: if outputformat == "matlab": savemat(filename+".mat", mdict={outputfile: data}, oned_as='column', do_compression='true') if outputformat == "text": savetxt(filename+".txt", data, fmt="%.6f") if outputformat == "image": if isrdd(data): data = rescale(data) dims = getdims(data) data = subtoind(data, dims.max) keys = data.map(lambda (k, _): int(k)).collect() nout = size(data.first()[1]) if nout > 1: for iout in range(0, nout): result = data.map(lambda (_, v): v[iout]).collect() result = array([v for (k, v) in sorted(zip(keys, result), key=lambda (k, v): k)]) arraytoim(squeeze(transpose(reshape(result, dims.num[::-1]))), filename+"-"+str(iout)) else: result = data.map(lambda (_, v): v).collect() result = array([v for (k, v) in sorted(zip(keys, result), key=lambda (k, v): k)]) arraytoim(squeeze(transpose(reshape(result, dims.num[::-1]))), filename) else:
def save(data, outputdir, outputfile, outputformat): """ Save data to a variety of formats Automatically determines whether data is an array or an RDD and handles appropriately For RDDs, data are sorted and reshaped based on the keys :param data: RDD of key value pairs or array :param outputdir: Location to save data to :param outputfile: file name to save data to :param outputformat: format for data ("matlab", "text", or "image") """ if not os.path.exists(outputdir): os.makedirs(outputdir) filename = os.path.join(outputdir, outputfile) if (outputformat == "matlab") | (outputformat == "text"): if isrdd(data): dims = getdims(data) data = subtoind(data, dims.max) keys = data.map(lambda (k, _): int(k)).collect() nout = size(data.first()[1]) if nout > 1: for iout in range(0, nout): result = data.map(lambda (_, v): float16(v[iout])).collect() result = array([ v for (k, v) in sorted(zip(keys, result), key=lambda (k, v): k) ]) if outputformat == "matlab": savemat(filename + "-" + str(iout) + ".mat", mdict={ outputfile + str(iout): squeeze( transpose( reshape(result, dims.num[::-1]))) }, oned_as='column', do_compression='true') if outputformat == "text": savetxt(filename + "-" + str(iout) + ".txt", result, fmt="%.6f") else: result = data.map(lambda (_, v): float16(v)).collect() result = array([ v for (k, v) in sorted(zip(keys, result), key=lambda (k, v): k) ]) if outputformat == "matlab": savemat(filename + ".mat", mdict={ outputfile: squeeze( transpose(reshape(result, dims.num[::-1]))) }, oned_as='column', do_compression='true') if outputformat == "text": savetxt(filename + ".txt", result, fmt="%.6f") else: if outputformat == "matlab": savemat(filename + ".mat", mdict={outputfile: data}, oned_as='column', do_compression='true') if outputformat == "text": savetxt(filename + ".txt", data, fmt="%.6f") if outputformat == "image": if isrdd(data): data = rescale(data) dims = getdims(data) data = subtoind(data, dims.max) keys = data.map(lambda (k, _): int(k)).collect() nout = size(data.first()[1]) if nout > 1: for iout in range(0, nout): result = data.map(lambda (_, v): v[iout]).collect() result = array([ v for (k, v) in sorted(zip(keys, result), key=lambda (k, v): k) ]) arraytoim( squeeze(transpose(reshape(result, dims.num[::-1]))), filename + "-" + str(iout)) else: result = data.map(lambda (_, v): v).collect() result = array([ v for (k, v) in sorted(zip(keys, result), key=lambda (k, v): k) ]) arraytoim(squeeze(transpose(reshape(result, dims.num[::-1]))), filename) else: arraytoim(data, filename)