def main(): """ Set up the parallel engine and the data space. Call the parallel engine, then write out the results """ client = Client()[:] client.use_dill() client.execute("import furnace.performance, furnace.strategy, numpy, datetime") stock_percents = numpy.linspace(0.0, 0.4, 10) rebalancing_periods = numpy.arange(1, 40, 1) days_in = numpy.arange(1, 250, 1) begin = datetime.datetime(2003, 1, 2) end = datetime.datetime(2011, 12, 31) grid = list(itertools.product(stock_percents, rebalancing_periods, days_in)) builder = function_builder(begin, end) results = client.map(builder, grid) #TODO: does pandas have a plain 'save to csv' function? with open('data.csv', 'wb') as csvfile: writer = csv.writer(csvfile) writer.writerow(['days_out', 'pct', 'ndays', 'r2r', 'cagr', 'volatility', 'ntrades']) writer.writerows(results)
def init(): #init direct view global view view = Client(profile='mpi')[:] view.block = True view.execute('from numpy import *') view.execute('from mpi4py import MPI') view.execute('import h5py as h5') view.execute('import os') view.run('ndarray/interengine.py') get_rank = interactive(lambda: MPI.COMM_WORLD.Get_rank()) all_ranks = view.apply(get_rank) view['target2rank'] = all_ranks
def main(): parser = argparse.ArgumentParser(description='Selects best stained representative images from gene/stage groups.') parser.add_argument('-g', '--genestages', metavar='file_name', type=str, nargs=1, help='tab delimited file containing gene names, stages, and image file names') parser.add_argument('-r', '--result', metavar='folder_name', type=str, nargs=1, help='path to the result folder') parser.add_argument('-c', '--cleared_model', metavar='file_name', type=str, nargs=1, help='path to pickled model predicting if the image was cleared') parser.add_argument('-l', '--log', metavar='file_name', type=str, nargs=1, help='path to log file') parser.add_argument('-s', '--noncluster_stage', dest='stage', metavar='stage', type=float, default=[22], nargs=1, help='The earliest stage which is not clustered on expression patterns. default: 22') if parallel: parser.add_argument('-p', '--parallel_images', metavar='N', type=str, nargs=1 , help='the number of threads or path to ipcontroller-client.json. Images in gene/stage groups are analysed in parallel with the gene/stage groups processed sequentially. Memory footprint is O(1) times bigger compared to single threaded run.') parser.add_argument('-j', '--parallel_genestages', metavar='N', type=str, nargs=1 , help='the number of threads or path to ipcontroller-client.json. Images in gene/stage groups are analysed sequentially with the gene/stage groups processed in parallel. Memory footprint is O(N) times bigger compared to single threaded run.') args = parser.parse_args() if not (args.genestages and args.result): print('Error: No action requested, please specify at least input files and output folder') args = parser.parse_args(["--help"]) if args.log is None: args.log = [None] if args.cleared_model is None: args.cleared_model = [None] if args.stage is None: args.stage = [22] compute = None run_jobs = run_jobs_sequentially stop_cluster = False if parallel and (args.parallel_images or args.parallel_genestages): if args.parallel_genestages: run_jobs = run_jobs_parallel N = args.parallel_genestages[0] else: N = args.parallel_images[0] if is_number(N): subprocess.Popen(["ipcluster", "start", "-n", N, "--quiet"])# c = None while c is None: time.sleep(.1) try: c = Client() except: continue n = int(N) while len(c.ids) != n: time.sleep(.1) compute = c[:] stop_cluster = True else: compute = Client(N)[:] compute.execute('import os;os.chdir("'+os.getcwd()+'")') compute.execute('import isimage') input_tab = pd.read_table(args.genestages[0], sep='\t', header=None, names=['gene', 'stage', 'image']) base_path = os.path.abspath(os.path.split(args.genestages[0])[0]) tasks = [] for g in set(input_tab['gene']): for s in set(input_tab[input_tab['gene'] == g]['stage']): tasks += [[g, float(s), [os.path.abspath(sep.join([base_path, '.', f])) if (not os.path.isabs(f)) else f for f in list(input_tab[(input_tab['gene'] == g) & (input_tab['stage'] == s)]['image'])]]] try: gs = run_jobs(tasks, args.result[0], args.cleared_model[0], compute=compute, nc_stage=args.stage[0], log=args.log[0]) except: raise finally: if stop_cluster: subprocess.Popen(["ipcluster", "stop", "--quiet"]) dump(gs, open(os.path.join(args.result[0], 'gs.dump'), 'w')) create_report(os.path.join(args.result[0], 'result.html'), [g for g in gs if g is not None])