Ejemplo n.º 1
0
def importFromIterator(
        outfile,
        tablename,
        iterator,
        columns=None,
        indices=None):
    '''import data from an iterator into a database.

    Arguments
    ---------
    outfile : string
        Output file name
    tablename : string
        Table name
    iterator : iterator
        Iterator to import data from. The iterator should
        yield either list/tuples or dictionaries for each
        row in the table.
    columns : list
        Column names. If not given, the assumption is that
        iterator will dictionaries and column names are derived
        from that.
    indices : list
        List of column names to add indices on.
    '''

    tmpfile = getTempFile(".")

    if columns:
        keys, values = list(zip(*list(columns.items())))
        tmpfile.write("\t".join(values) + "\n")

    for row in iterator:
        if not columns:
            keys = list(row[0].keys())
            values = keys
            columns = keys
            tmpfile.write("\t".join(values) + "\n")

        tmpfile.write("\t".join(str(row[x]) for x in keys) + "\n")

    tmpfile.close()

    if indices:
        indices = " ".join("--add-index=%s" % x for x in indices)
    else:
        indices = ""

    load(tmpfile.name,
         outfile,
         tablename=tablename,
         options=indices)

    os.unlink(tmpfile.name)
Ejemplo n.º 2
0
def importFromIterator(
        outfile,
        tablename,
        iterator,
        columns=None,
        indices=None):
    '''import data from an iterator into a database.

    Arguments
    ---------
    outfile : string
        Output file name
    tablename : string
        Table name
    iterator : iterator
        Iterator to import data from. The iterator should
        yield either list/tuples or dictionaries for each
        row in the table.
    columns : list
        Column names. If not given, the assumption is that
        iterator will dictionaries and column names are derived
        from that.
    indices : list
        List of column names to add indices on.
    '''

    tmpfile = getTempFile(".")

    if columns:
        keys, values = list(zip(*list(columns.items())))
        tmpfile.write("\t".join(values) + "\n")

    for row in iterator:
        if not columns:
            keys = list(row[0].keys())
            values = keys
            columns = keys
            tmpfile.write("\t".join(values) + "\n")

        tmpfile.write("\t".join(str(row[x]) for x in keys) + "\n")

    tmpfile.close()

    if indices:
        indices = " ".join("--add-index=%s" % x for x in indices)
    else:
        indices = ""

    load(tmpfile.name,
         outfile,
         tablename=tablename,
         options=indices)

    os.unlink(tmpfile.name)
Ejemplo n.º 3
0
    def buildJobScript(statement, job_memory, job_name):
        '''build job script from statement.

        returns (name_of_script, stdout_path, stderr_path)
        '''

        tmpfile = getTempFile(dir=PARAMS["workingdir"])
        # disabled: -l -O expand_aliases\n" )
        tmpfile.write("#!/bin/bash\n")
        tmpfile.write(
            'echo "%s : START -> %s" >> %s\n' %
            (job_name, tmpfile.name, shellfile))
        # disabled - problems with quoting
        # tmpfile.write( '''echo 'statement=%s' >> %s\n''' %
        # (shellquote(statement), shellfile) )
        tmpfile.write("set | sed 's/^/%s : /' &>> %s\n" %
                      (job_name, shellfile))
        # module list outputs to stderr, so merge stderr and stdout
        tmpfile.write("module list 2>&1 | sed 's/^/%s: /' &>> %s\n" %
                      (job_name, shellfile))
        tmpfile.write("hostname | sed 's/^/%s: /' &>> %s\n" %
                      (job_name, shellfile))
        tmpfile.write("cat /proc/meminfo | sed 's/^/%s: /' &>> %s\n" %
                      (job_name, shellfile))
        tmpfile.write(
            'echo "%s : END -> %s" >> %s\n' %
            (job_name, tmpfile.name, shellfile))

        # restrict virtual memory
        # Note that there are resources in SGE which could do this directly
        # such as v_hmem.
        # Note that limiting resident set sizes (RSS) with ulimit is not
        # possible in newer kernels.
        tmpfile.write("ulimit -v %i\n" % IOTools.human2bytes(job_memory))

        tmpfile.write(
            expandStatement(
                statement,
                ignore_pipe_errors=ignore_pipe_errors) + "\n")
        tmpfile.close()

        job_path = os.path.abspath(tmpfile.name)
        stdout_path = job_path + ".stdout"
        stderr_path = job_path + ".stderr"

        os.chmod(job_path, stat.S_IRWXG | stat.S_IRWXU)

        return (job_path, stdout_path, stderr_path)