Esempio n. 1
0
def save(data, outputdir, outputfile, outputformat):
    """
    Save data to a variety of formats
    Automatically determines whether data is an array
    or an RDD and handles appropriately
    For RDDs, data are sorted and reshaped based on the keys

    :param data: RDD of key value pairs or array
    :param outputdir: Location to save data to
    :param outputfile: file name to save data to
    :param outputformat: format for data ("matlab", "text", or "image")
    """

    filename = os.path.join(outputdir, outputfile)

    if (outputformat == "matlab") | (outputformat == "text"):
        if isrdd(data):
            dims = getdims(data)
            data = subtoind(data, dims.max)
            keys = data.map(lambda (k, _): int(k)).collect()
            nout = size(data.first()[1])
            if nout > 1:
                for iout in range(0, nout):
                    result = data.map(lambda (_, v): float16(v[iout])).collect()
                    result = array([v for (k, v) in sorted(zip(keys, result), key=lambda (k, v): k)])
                    if outputformat == "matlab":
                        savemat(filename+"-"+str(iout)+".mat",
                                mdict={outputfile+str(iout): squeeze(transpose(reshape(result, dims.num[::-1])))},
                                oned_as='column', do_compression='true')
                    if outputformat == "text":
                        savetxt(filename+"-"+str(iout)+".txt", result, fmt="%.6f")
            else:
                result = data.map(lambda (_, v): float16(v)).collect()
                result = array([v for (k, v) in sorted(zip(keys, result), key=lambda (k, v): k)])
Esempio n. 2
0
 def test_get_dims_array(self):
     subs = [(1, 1, 1), (2, 1, 1), (1, 2, 1), (2, 2, 1), (1, 3, 1), (2, 3, 1),
             (1, 1, 2), (2, 1, 2), (1, 2, 2), (2, 2, 2), (1, 3, 2), (2, 3, 2)]
     data_local = map(lambda x: (x, array([1.0])), subs)
     dims = getdims(data_local)
     assert(allclose(dims.max, array([2, 3, 2])))
     assert(allclose(dims.num, array([2, 3, 2])))
     assert(allclose(dims.min, array([1, 1, 1])))
Esempio n. 3
0
 def test_get_dims_array(self):
     subs = [(1, 1, 1), (2, 1, 1), (1, 2, 1), (2, 2, 1), (1, 3, 1),
             (2, 3, 1), (1, 1, 2), (2, 1, 2), (1, 2, 2), (2, 2, 2),
             (1, 3, 2), (2, 3, 2)]
     data_local = map(lambda x: (x, array([1.0])), subs)
     dims = getdims(data_local)
     assert (allclose(dims.max, array([2, 3, 2])))
     assert (allclose(dims.num, array([2, 3, 2])))
     assert (allclose(dims.min, array([1, 1, 1])))
Esempio n. 4
0
def query(data, indsfile):
    """Query data by averaging together
    data points with the given indices

    :param data: RDD of data points as key value pairs
    :param indsfile: string with file location or array

    :return ts: array with averages
    """
    # load indices
    method = SigProcessingMethod.load("query", indsfile=indsfile)

    # convert to linear indexing
    dims = getdims(data)
    data = subtoind(data, dims.max)

    # loop over indices, averaging time series
    ts = zeros((method.n, len(data.first()[1])))
    for i in range(0, method.n):
        ts[i, :] = data.filter(lambda (k, _): k in method.inds[i]).map(
            lambda (k, x): x).mean()

    return ts
Esempio n. 5
0
def query(data, indsfile):
    """Query data by averaging together
    data points with the given indices

    :param data: RDD of data points as key value pairs
    :param indsfile: string with file location or array

    :return ts: array with averages
    """
    # load indices
    method = SigProcessingMethod.load("query", indsfile=indsfile)

    # convert to linear indexing
    dims = getdims(data)
    data = subtoind(data, dims.max)

    # loop over indices, averaging time series
    ts = zeros((method.n, len(data.first()[1])))
    for i in range(0, method.n):
        indsb = data.context.broadcast(method.inds[i])
        ts[i, :] = data.filter(lambda (k, _): k in indsb.value).map(
            lambda (k, x): x).mean()

    return ts
Esempio n. 6
0
                    savemat(filename+".mat", mdict={outputfile: squeeze(transpose(reshape(result, dims.num[::-1])))},
                            oned_as='column', do_compression='true')
                if outputformat == "text":
                    savetxt(filename+".txt", result, fmt="%.6f")

        else:
            if outputformat == "matlab":
                savemat(filename+".mat", mdict={outputfile: data}, oned_as='column', do_compression='true')
            if outputformat == "text":
                savetxt(filename+".txt", data, fmt="%.6f")

    if outputformat == "image":

        if isrdd(data):
            data = rescale(data)
            dims = getdims(data)
            data = subtoind(data, dims.max)
            keys = data.map(lambda (k, _): int(k)).collect()
            nout = size(data.first()[1])
            if nout > 1:
                for iout in range(0, nout):
                    result = data.map(lambda (_, v): v[iout]).collect()
                    result = array([v for (k, v) in sorted(zip(keys, result), key=lambda (k, v): k)])
                    arraytoim(squeeze(transpose(reshape(result, dims.num[::-1]))), filename+"-"+str(iout))
            else:
                result = data.map(lambda (_, v): v).collect()
                result = array([v for (k, v) in sorted(zip(keys, result), key=lambda (k, v): k)])
                arraytoim(squeeze(transpose(reshape(result, dims.num[::-1]))), filename)
        else:
            arraytoim(data, filename)
Esempio n. 7
0
def save(data, outputdir, outputfile, outputformat):
    """
    Save data to a variety of formats
    Automatically determines whether data is an array
    or an RDD and handles appropriately
    For RDDs, data are sorted and reshaped based on the keys

    :param data: RDD of key value pairs or array
    :param outputdir: Location to save data to
    :param outputfile: file name to save data to
    :param outputformat: format for data ("matlab", "text", or "image")
    """

    if not os.path.exists(outputdir):
        os.makedirs(outputdir)

    filename = os.path.join(outputdir, outputfile)

    if (outputformat == "matlab") | (outputformat == "text"):
        if isrdd(data):
            dims = getdims(data)
            data = subtoind(data, dims.max)
            keys = data.map(lambda (k, _): int(k)).collect()
            nout = size(data.first()[1])
            if nout > 1:
                for iout in range(0, nout):
                    result = data.map(lambda
                                      (_, v): float16(v[iout])).collect()
                    result = array([
                        v for (k, v) in sorted(zip(keys, result),
                                               key=lambda (k, v): k)
                    ])
                    if outputformat == "matlab":
                        savemat(filename + "-" + str(iout) + ".mat",
                                mdict={
                                    outputfile + str(iout):
                                    squeeze(
                                        transpose(
                                            reshape(result, dims.num[::-1])))
                                },
                                oned_as='column',
                                do_compression='true')
                    if outputformat == "text":
                        savetxt(filename + "-" + str(iout) + ".txt",
                                result,
                                fmt="%.6f")
            else:
                result = data.map(lambda (_, v): float16(v)).collect()
                result = array([
                    v
                    for (k,
                         v) in sorted(zip(keys, result), key=lambda (k, v): k)
                ])
                if outputformat == "matlab":
                    savemat(filename + ".mat",
                            mdict={
                                outputfile:
                                squeeze(
                                    transpose(reshape(result, dims.num[::-1])))
                            },
                            oned_as='column',
                            do_compression='true')
                if outputformat == "text":
                    savetxt(filename + ".txt", result, fmt="%.6f")

        else:
            if outputformat == "matlab":
                savemat(filename + ".mat",
                        mdict={outputfile: data},
                        oned_as='column',
                        do_compression='true')
            if outputformat == "text":
                savetxt(filename + ".txt", data, fmt="%.6f")

    if outputformat == "image":

        if isrdd(data):
            data = rescale(data)
            dims = getdims(data)
            data = subtoind(data, dims.max)
            keys = data.map(lambda (k, _): int(k)).collect()
            nout = size(data.first()[1])
            if nout > 1:
                for iout in range(0, nout):
                    result = data.map(lambda (_, v): v[iout]).collect()
                    result = array([
                        v for (k, v) in sorted(zip(keys, result),
                                               key=lambda (k, v): k)
                    ])
                    arraytoim(
                        squeeze(transpose(reshape(result, dims.num[::-1]))),
                        filename + "-" + str(iout))
            else:
                result = data.map(lambda (_, v): v).collect()
                result = array([
                    v
                    for (k,
                         v) in sorted(zip(keys, result), key=lambda (k, v): k)
                ])
                arraytoim(squeeze(transpose(reshape(result, dims.num[::-1]))),
                          filename)
        else:
            arraytoim(data, filename)