def importFromIterator( outfile, tablename, iterator, columns=None, indices=None): '''import data from an iterator into a database. Arguments --------- outfile : string Output file name tablename : string Table name iterator : iterator Iterator to import data from. The iterator should yield either list/tuples or dictionaries for each row in the table. columns : list Column names. If not given, the assumption is that iterator will dictionaries and column names are derived from that. indices : list List of column names to add indices on. ''' tmpfile = getTempFile(".") if columns: keys, values = list(zip(*list(columns.items()))) tmpfile.write("\t".join(values) + "\n") for row in iterator: if not columns: keys = list(row[0].keys()) values = keys columns = keys tmpfile.write("\t".join(values) + "\n") tmpfile.write("\t".join(str(row[x]) for x in keys) + "\n") tmpfile.close() if indices: indices = " ".join("--add-index=%s" % x for x in indices) else: indices = "" load(tmpfile.name, outfile, tablename=tablename, options=indices) os.unlink(tmpfile.name)
def buildJobScript(statement, job_memory, job_name): '''build job script from statement. returns (name_of_script, stdout_path, stderr_path) ''' tmpfile = getTempFile(dir=PARAMS["workingdir"]) # disabled: -l -O expand_aliases\n" ) tmpfile.write("#!/bin/bash\n") tmpfile.write( 'echo "%s : START -> %s" >> %s\n' % (job_name, tmpfile.name, shellfile)) # disabled - problems with quoting # tmpfile.write( '''echo 'statement=%s' >> %s\n''' % # (shellquote(statement), shellfile) ) tmpfile.write("set | sed 's/^/%s : /' &>> %s\n" % (job_name, shellfile)) # module list outputs to stderr, so merge stderr and stdout tmpfile.write("module list 2>&1 | sed 's/^/%s: /' &>> %s\n" % (job_name, shellfile)) tmpfile.write("hostname | sed 's/^/%s: /' &>> %s\n" % (job_name, shellfile)) tmpfile.write("cat /proc/meminfo | sed 's/^/%s: /' &>> %s\n" % (job_name, shellfile)) tmpfile.write( 'echo "%s : END -> %s" >> %s\n' % (job_name, tmpfile.name, shellfile)) # restrict virtual memory # Note that there are resources in SGE which could do this directly # such as v_hmem. # Note that limiting resident set sizes (RSS) with ulimit is not # possible in newer kernels. tmpfile.write("ulimit -v %i\n" % IOTools.human2bytes(job_memory)) tmpfile.write( expandStatement( statement, ignore_pipe_errors=ignore_pipe_errors) + "\n") tmpfile.close() job_path = os.path.abspath(tmpfile.name) stdout_path = job_path + ".stdout" stderr_path = job_path + ".stderr" os.chmod(job_path, stat.S_IRWXG | stat.S_IRWXU) return (job_path, stdout_path, stderr_path)