def update_products(self, message): def update_product_callback(update_data): product = Product.objects( external_id=update_data["external_id"]).first() if not product: logger.warn("Product %s not found in database" % update_data["external_id"]) return brand = Brand.objects(id=product.brand_id).first() ans = update_sku_price(product.product_id, product.external_id, update_data["price"], brand.name) if ans.get("error"): logger.error( "Unable to update product(%s) price, message error: %s" % (product.external_id, ans.get("error_message"))) else: logger.info("Product(%s) price, updated" % (product.external_id, )) ans = update_sku_stocks(product.product_id, update_data["stocks"], brand.name) if ans.get("error"): logger.error( "Unable to update product(%s) stocks, message error: %s" % (product.external_id, ans.get("error_message"))) else: logger.info("Product(%s) stocks, updated!" % (product.external_id, )) with ThreadingPool(nodes=4) as pool: pool.map(update_product_callback, parse_xml_products(message))
def generateMDPs(self): self.mdps = [] results = [] p = Pool(processes=(self.num_agents + 2)) for i in xrange(0, self.num_agents): print "Generating MDP for Agent" + str(i) a = MDP(i, self.config) self.mdps.append(a) res = p.amap(self._instance_method_alias_call, self.mdps) self.mdps = res.get() sum = 0 for m in self.mdps: sum += m.numberVariables print "Total Number of Variables: ", sum
def create_training_data(self): # Delete existing data for f in os.listdir(self.training_data_path): os.remove(os.path.join(self.training_data_path, f)) # Create data print("Creating training data...") _ = list( tqdm.tqdm(ThreadingPool().imap(self.handle_document, self.doc_set.document_set), total=len(self.doc_set.document_set)))
def get_all_brands_products(): brands = [] for brand_name in AUTH_PARAMETERS.keys(): brand = Brand.objects(name=brand_name).first() if not brand: brand = Brand(name=brand_name) brand.save() brands.append(brand) with ThreadingPool(nodes=4) as pool: pool.map(get_products_callback, brands)
def main(): gn = GoogLeNet() y = [] pool = Pool(3) for i in range(10): x = np.load('E:\imagenet\imgroup' + str(i) + '.npy') t = [x[_:(_ + 1)] for _ in np.arange(100)] print(i) y.append(pool.map(gn.predict, t)) result = [] for z in y: for zz in z: x = np.array(zz.data) for zzz in x: result.append(np.argsort(zzz)[-5:]) print(np.array(result).shape) np.save('E:\ccc\\result\\gn_over_10', np.array(result, dtype=np.int32))
def main(): v16 = vgg.VGG16Layers() #print([a]) y = [] pool = Pool(16) for i in range(10): #[np.load('/home/cjy/cjy/imnet/imgroup' + str(i) + '.npy').tolist() for i in range(4*i,4*i+4)]) x = np.load('/home/cjy/cjy/imnet/imgroup' + str(i) + '.npy') t = [x[_:(_ + 1)] for _ in np.arange(100)] y.append(pool.map(v16.predict, t)) #pool.close() result = [] for z in y: for zz in z: x = np.array(zz.data) for zzz in x: result.append(zzz.argmax()) print(np.array(result).shape) np.save('/home/cjy/cjy/result/vgg16', np.array(result, dtype=np.int32))
def get_products_callback(brand, page=1): """ :param Brand brand: :param int page: :return: """ ans = get_products(brand.name, page) if ans.get("error_code"): logger.error("failed to load brand - %s: page: %s, error message: %s" % (brand.name, page, ans.get("error_message"))) return print "getting products for brand: %s, page: %s ..." % (brand.name, page) res = ans["result"] products_list = res["aeop_a_e_product_display_d_t_o_list"][ "item_display_dto"] with ThreadingPool(nodes=20) as pool: pool.map( lambda product_info: create_product_callback(brand, product_info), products_list) total_page = int(res["total_page"]) if page != total_page: get_products_callback(brand, page + 1)
def main(specie): print("loading {} data...".format(specie)) with lite.connect(conf.databases[specie]) as con: cursor = con.cursor() cursor.execute("SELECT DISTINCT transcript_id from aliases") result = cursor.fetchall() # TODO - remve boundry fr names names = [value[0] for value in result] print("creating transcript database for {}".format(specie)) # give this thing a progress bar global bar bar = progressbar.AnimatedProgressBar(end=len(names) + 1, width=10) pool = ThreadingPool(num_threads) assign_and_get_with_specie = partial(assign_and_get, specie) result = pool.amap(assign_and_get_with_specie, names) while True: if result.ready(): break bar.show_progress() time.sleep(1) data = list(result.get()) #print(data) # dark magic incoming # flatten the list # make it a list of tuples of id,index,domainlist # TODO Change to deal with new variant data coming from assign_and_get # variant has name,variant_index,domains,exons # TODO maybe no dark magic # TODO check that it is actually runs ''' the following dark magic is one line of this: for variants in data: if not variants: continue for variant in variants: if not variant: continue for exon in variant['exons']: if not exon: continue pass ''' data = [ ( variant['name'], '_'.join([variant['name'],str(variant['variant_index'])]),\ exon['transcript_id'],\ exon['index'],\ exon['relative_start'],\ exon['relative_end'],\ json.dumps(exon['domains_states']),\ json.dumps(exon['domains'])\ ) \ for variants in data if variants != None \ for variant in variants if variant != None \ for exon in variant['exons'] if exon!= None ] print('new_data: \n', data) # print("well that was fun, now exit") # sys.exit(2) write_to_db(data, specie) print()
#!/usr/bin/env python # # Author: Mike McKerns (mmckerns @caltech and @uqfoundation) # Copyright (c) 1997-2014 California Institute of Technology. # License: 3-clause BSD. The full license text is available at: # - http://trac.mystic.cacr.caltech.edu/project/pathos/browser/pathos/LICENSE from pathos.multiprocessing import ProcessingPool as Pool from pathos.multiprocessing import ThreadingPool as TPool pool = Pool() tpool = TPool() # pickle fails for nested functions def adder(augend): zero = [0] def inner(addend): return addend + augend + zero[0] return inner # build from inner function add_me = adder(5) # build from lambda functions squ = lambda x: x**2 # test 'dilled' multiprocessing for inner print "Evaluate 10 items on 2 proc:"
def trainModel(options, cap, edges, g_1, ConNN, DisNN, length, listofenvironments, T_min, T_max, train_mode, num_of_agents, num_of_zones, GOALS): uniqueGOALS = list(np.unique(GOALS)) num_of_GOALS = len(uniqueGOALS) GOALS_index = {yy: xx for xx, yy in enumerate(uniqueGOALS)} with tf.Session(graph=g_1, config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess: sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter( '/home/jiajing/Project/REINFORCE/PythonScripts/tflogs/' + options.experimentname, sess.graph) #We will collect 32 trajectories/episodes per iteration N = options.N # Each trajectory will have at most 100 time steps T = options.T # Number of iterations n_itr = options.n_itr # Set the discount factor for the problem discount = options.discount #check whethe episode ends paths = [] Collision = [] # If you need to write the model, just write once here, no need to do it in parallel multiple times. WriteModel = False if WriteModel: # Set the default brain to work with env = listofenvironments[0] default_brain = env.brain_names[0] brain = env.brains[default_brain] env_info = env.reset(train_mode=train_mode, config={"WriteModel#0F#1T": 1.0})[default_brain] env.close() raise Exception( 'Please set WriteModel to False now and run again.') pool = Pool(processes=len(listofenvironments) + 4) all_static_info = {} all_static_info["edges"] = edges all_static_info["ConNN"] = ConNN all_static_info["DisNN"] = DisNN all_static_info["length"] = length all_static_info["T_min"] = T_min all_static_info["T_max"] = T_max all_static_info["train_mode"] = train_mode all_static_info["num_of_agents"] = num_of_agents all_static_info["num_of_zones"] = num_of_zones all_static_info["GOALS"] = GOALS all_static_info["uniqueGOALS"] = uniqueGOALS all_static_info["num_of_GOALS"] = num_of_GOALS all_static_info["GOALS_index"] = GOALS_index all_static_info["cap"] = cap for i in range(0, n_itr): QMIXParameters = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='qmix/eval') QMIXTargetParameters = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope='qmix/target') MuParameters = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Mu/eval') MuTargetParameters = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope='Mu/target') soft_replacement = [ tf.assign(t, 0.99 * t + 0.01 * e) for t, e in zip(QMIXTargetParameters, QMIXParameters) ] sess.run(soft_replacement) soft_replacement = [ tf.assign(t, 0.99 * t + 0.01 * e) for t, e in zip(MuTargetParameters, MuParameters) ] sess.run(soft_replacement) #new iteration with open(options.experimentname + '.txt', 'a+') as f: f.write('Iteration' + str(i) + '\n') #did not consider the case that last state is terminal state. all_samples_global_states = [] all_samples_next_global_states = [] all_samples_last_global_states = [] all_samples_observations = [] all_samples_next_observations = [] all_samples_last_observations = [] all_samples_dis_actions = [] all_samples_next_dis_actions = [] all_samples_last_dis_actions = [] all_samples_con_actions = [] all_samples_next_con_actions = [] all_samples_last_con_actions = [] all_rets = [] all_last_rets = [] all_samples_next_states = [] eps_threshold = 0.05 + (0.9 - 0.05) * np.exp(-1. * i / 300) result = pool.amap(randomFunction, listofenvironments, [eps_threshold] * len(listofenvironments), [options] * len(listofenvironments), [all_static_info] * len(listofenvironments), [sess] * len(listofenvironments)) all_dictionaries_all_episodes = result.get() for data_from_env in range(0, N): all_reach, total_len_eps, num_collision_eps, samples_states, samples_global_states, samples_observations, samples_dis_actions, samples_con_actions, rets = all_dictionaries_all_episodes[ data_from_env] all_samples_global_states.append( samples_global_states[0:-2, :, :]) all_samples_next_global_states.append( samples_global_states[1:-1, :, :]) all_samples_observations.append( samples_observations[0:-2, :, :]) all_samples_next_observations.append( samples_observations[1:-1, :, :]) all_samples_dis_actions.append(samples_dis_actions[0:-2, :, :]) all_samples_next_dis_actions.append( samples_dis_actions[1:-1, :, :]) all_samples_con_actions.append(samples_con_actions[0:-2, :, :]) all_samples_next_con_actions.append( samples_con_actions[1:-1, :, :]) all_rets.append(rets[0:-1]) all_samples_next_states.append(samples_states[1:-1, :]) if all_reach: all_samples_last_global_states.append( samples_global_states[-2:-1, :, :]) all_samples_last_observations.append( samples_observations[-2:-1, :, :]) all_samples_last_dis_actions.append( samples_dis_actions[-2:-1, :, :]) all_samples_last_con_actions.append( samples_con_actions[-2:-1, :, :]) all_last_rets.append([rets[-1]]) paths.append(total_len_eps) Collision.append(num_collision_eps) all_samples_global_states = np.concatenate( all_samples_global_states, axis=0) all_samples_next_global_states = np.concatenate( all_samples_next_global_states, axis=0) all_samples_observations = np.concatenate(all_samples_observations, axis=0) all_samples_next_observations = np.concatenate( all_samples_next_observations, axis=0) all_samples_dis_actions = np.concatenate(all_samples_dis_actions, axis=0) all_samples_next_dis_actions = np.concatenate( all_samples_next_dis_actions, axis=0) all_samples_con_actions = np.concatenate(all_samples_con_actions, axis=0) all_samples_next_con_actions = np.concatenate( all_samples_next_con_actions, axis=0) all_rets = np.concatenate(all_rets, axis=0) all_samples_next_states = np.concatenate(all_samples_next_states, axis=0) if len(all_last_rets) != 0: all_samples_last_global_states = np.concatenate( all_samples_last_global_states, axis=0) all_samples_last_observations = np.concatenate( all_samples_last_observations, axis=0) all_samples_last_dis_actions = np.concatenate( all_samples_last_dis_actions, axis=0) all_samples_last_con_actions = np.concatenate( all_samples_last_con_actions, axis=0) all_last_rets = np.concatenate(all_last_rets, axis=0) hybrid_actions = {} for nn in range(0, num_of_agents): hybrid_actions[nn] = [[], []] sample_len = all_samples_observations.shape[0] for all_act in range(4): dis_actions_input = np.asarray( [to_one_hot(all_act, 4)] * num_of_agents * sample_len).reshape((sample_len, num_of_agents, 4)) con_actions = ConNN.getAction_target( sess, all_samples_next_global_states, all_samples_next_observations, dis_actions_input) con_actions_input = np.asarray( [xx[:, 0] for xx in con_actions.values()]).T.reshape( (sample_len, num_of_agents, 1)) q_values_target = DisNN.get_q_values_target( sess, all_samples_next_global_states, all_samples_next_observations, dis_actions_input, con_actions_input) for nn in range(0, num_of_agents): hybrid_actions[nn][0].append(con_actions[nn][:, 0]) hybrid_actions[nn][1].append(q_values_target[nn][:, 0]) for nn in range(0, num_of_agents): hybrid_actions[nn][0] = np.asarray(hybrid_actions[nn][0]).T hybrid_actions[nn][1] = np.asarray(hybrid_actions[nn][1]).T dis_actions_input = [] con_actions_input = [] for tt in range(sample_len): tmp_dic_actions = [] tmp_con_actions = [] for nn in range(0, num_of_agents): if all_samples_next_con_actions[tt][nn][0] == -1: a = -1 nu = -1 else: a = np.argmax( hybrid_actions[nn][1][tt] [0:len(edges[all_samples_next_states[tt][nn]])]) nu = hybrid_actions[nn][0][tt][a] tmp_dic_actions.append(to_one_hot(a, 4)) tmp_con_actions.append(nu) dis_actions_input.append(tmp_dic_actions) con_actions_input.append(tmp_con_actions) dis_actions_input = np.asarray(dis_actions_input).reshape( (sample_len, num_of_agents, 4)) con_actions_input = np.asarray(con_actions_input).reshape( (sample_len, num_of_agents, 1)) # con_actions_input = np.asarray([0]*sample_len*num_of_agents).reshape((sample_len, num_of_agents, 1)) q_value_mix_next = DisNN.get_q_value_mix_target( sess, all_samples_next_global_states, all_samples_next_observations, dis_actions_input, con_actions_input) # q_value_mix_next = DisNN.get_q_value_mix_target(sess, all_samples_next_global_states, all_samples_next_observations, all_samples_next_dis_actions, all_samples_next_con_actions) #starting training critic inpdict = {} if len(all_last_rets) != 0: inpdict[DisNN.global_state] = np.concatenate([ all_samples_global_states, all_samples_last_global_states ], axis=0) inpdict[DisNN.obs] = np.concatenate( [all_samples_observations, all_samples_last_observations], axis=0) inpdict[DisNN.dis_actions] = np.concatenate( [all_samples_dis_actions, all_samples_last_dis_actions], axis=0) inpdict[DisNN.con_actions] = np.concatenate( [all_samples_con_actions, all_samples_last_con_actions], axis=0) inpdict[DisNN.r] = np.concatenate([ all_rets.reshape((len(all_rets), 1)), all_last_rets.reshape((len(all_last_rets), 1)) ], axis=0) inpdict[DisNN.q_value_mix_next] = np.concatenate([ q_value_mix_next, np.asarray([0] * len(all_last_rets)).reshape( (len(all_last_rets), 1)) ], axis=0) else: inpdict[DisNN.global_state] = all_samples_global_states inpdict[DisNN.obs] = all_samples_observations inpdict[DisNN.dis_actions] = all_samples_dis_actions inpdict[DisNN.con_actions] = all_samples_con_actions inpdict[DisNN.r] = all_rets.reshape((len(all_rets), 1)) inpdict[DisNN.q_value_mix_next] = q_value_mix_next sess.run(DisNN.learning_step, feed_dict=inpdict) loss1_after = sess.run(DisNN.loss, feed_dict=inpdict) #starting training actor gradients = DisNN.get_gradients(sess, all_samples_global_states, all_samples_observations, all_samples_dis_actions, all_samples_con_actions)[0] inpdicts = {} all_samples_global_states_single = {} all_samples_observations_single = {} all_samples_dis_actions_single = {} gradients_single = {} for nn in range(num_of_agents): inpdicts[nn] = {} all_samples_global_states_single[nn] = [] all_samples_observations_single[nn] = [] all_samples_dis_actions_single[nn] = [] gradients_single[nn] = [] sample_len = all_samples_con_actions.shape[0] for tt in range(sample_len): for nn in range(num_of_agents): if all_samples_con_actions[tt][nn][0] != -1: all_samples_global_states_single[nn].append( all_samples_global_states[tt, :, :]) all_samples_observations_single[nn].append( all_samples_observations[tt, :, :]) all_samples_dis_actions_single[nn].append( all_samples_dis_actions[tt, :, :]) gradients_single[nn].append(gradients[tt, :, :]) for nn in range(num_of_agents): inpdicts[nn][ConNN.global_state] = np.asarray( all_samples_global_states_single[nn]).reshape( (-1, num_of_agents, num_of_zones * 2 + T_max + 1)) inpdicts[nn][ConNN.obs] = np.asarray( all_samples_observations_single[nn]).reshape( (-1, num_of_agents, num_of_zones)) inpdicts[nn][ConNN.dis_actions] = np.asarray( all_samples_dis_actions_single[nn]).reshape( (-1, num_of_agents, 4)) inpdicts[nn][ConNN.action_gradients] = np.asarray( gradients_single[nn]).reshape((-1, num_of_agents, 1)) sess.run(ConNN.learning_step[nn], feed_dict=inpdicts[nn]) summary = tf.Summary() summary.value.add(tag='summaries/length_of_path', simple_value=np.mean(paths[i * N:(i + 1) * N])) summary.value.add(tag='summaries/num_collision', simple_value=np.mean(Collision[i * N:(i + 1) * N])) # summary.value.add(tag='summaries/actor_training_loss', simple_value = (loss2_before+loss2_after)/2) summary.value.add(tag='summaries/critic_training_loss', simple_value=loss1_after) writer.add_summary(summary, i) writer.flush() # # # print(str(loss_before) + " " + str(loss_after) + " ") saver = tf.train.Saver() save_path = '/home/jiajing/Project/REINFORCE/PythonScripts/trainedModel/' + options.experimentname if not os.path.exists(save_path): os.makedirs(save_path) saver.save(sess, save_path + '/model.ckpt') for env in listofenvironments: env.close()
#!/usr/bin/env python from pathos.multiprocessing import ProcessingPool as Pool from pathos.multiprocessing import ThreadingPool as TPool pool = Pool() tpool = TPool() def host(id): import socket return "Rank: %d -- %s" % (id, socket.gethostname()) print "Evaluate 10 items on 1 proc" pool.ncpus = 1 res3 = pool.map(host, range(10)) print pool print '\n'.join(res3) print '' print "Evaluate 10 items on 2 proc" pool.ncpus = 2 res5 = pool.map(host, range(10)) print pool print '\n'.join(res5) print '' print "Evaluate 10 items on ? proc" pool.ncpus = None res7 = pool.map(host, range(10)) print pool print '\n'.join(res7)
#!/usr/bin/env python # # Author: Mike McKerns (mmckerns @caltech and @uqfoundation) # Copyright (c) 1997-2014 California Institute of Technology. # License: 3-clause BSD. The full license text is available at: # - http://trac.mystic.cacr.caltech.edu/project/pathos/browser/pathos/LICENSE from pathos.multiprocessing import ProcessingPool as Pool from pathos.multiprocessing import ThreadingPool as TPool pool = Pool() tpool = TPool() def host(id): import socket return "Rank: %d -- %s" % (id, socket.gethostname()) print "Evaluate 10 items on 1 proc" pool.ncpus = 1 res3 = pool.map(host, range(10)) print pool print '\n'.join(res3) print '' print "Evaluate 10 items on 2 proc" pool.ncpus = 2 res5 = pool.map(host, range(10)) print pool print '\n'.join(res5) print ''
def add_tracking(userslist): print(userslist) fdf = [] with multiprocessing.Pool(processes=8) as pool: fdf = pool.map(track, userslist) pool.close() pool.join() return reduce(lambda x, y: pd.concat([x, y], axis=0), fdf) start = time.time() result = ThreadingPool().map(add_tracking, list(userslist)) ''' with multiprocessing.Pool(processes=8) as pool: result = pool.map(add_tracking, list(userslist)) ''' ''' fdf=[] for u in userslist: res=add_tracking(u) fdf.append(res) ''' end = time.time() minutes = int((end - start) / 60) seconds = np.round((end - start) % 60, 2) print("time taken: {} minutes and {} seconds".format(minutes, seconds))
parser = ArgumentParser( prog="Challenge #3", description="Print all combinations of the given string" ) parser.add_argument( "input_string", help="the string to be randomized" ) parser.add_argument( "--partition_size", help="the size of partitions to be used", type=int, default=1 ) parser.add_argument( "--max_threads", help="the max number of threads to use", type=int, default=1 ) args = vars(parser.parse_args()) pool = Pool(args["max_threads"]) partitions = [args['input_string'][i:i+args['partition_size']] for i in range(0,len(args['input_string']),args['partition_size'])] partitionsm = pool.map(resolveVariables, partitions) printCombined(pool, partitionsm)
''' import sys if sys.hexversion >= 0x2060000: import multiprocessing as mp from multiprocessing import cpu_count import multiprocessing.dummy as mpdummy else: import processing as mp from processing import cpuCount as cpu_count import processing.dummy as mpdummy ''' from pathos.multiprocessing import ProcessingPool, ThreadingPool, __STATE from pathos.helpers import cpu_count mp = ProcessingPool() tp = ThreadingPool() # backward compatibility #FIXME: deprecated... and buggy! (fails to dill on imap/uimap) def mp_map(function, sequence, *args, **kwds): '''extend python's parallel map function to multiprocessing Inputs: function -- target function sequence -- sequence to process in parallel Additional Inputs: nproc -- number of 'local' cpus to use [defaut = 'autodetect'] type -- processing type ['blocking', 'non-blocking', 'unordered'] threads -- if True, use threading instead of multiprocessing '''
if args['total_value'] < 0: raise Exception('total_value must be positive') if args['gifts'] <= 0: raise Exception('gifts must be positive') if args['partitions'] < 1: raise Exception('must have at least 1 partitions to ensure completion') items = open(args['file_path'], 'rb').readlines() indexes = range(len(items)) indexed_items = list(zip(indexes, items)) # Setup parallelism pool pool = Pool(int(pow(len(indexed_items), args['gifts']))) items_list = pool.map(parse_line, indexed_items) results = subset_sum(total_money=args['total_value'], all_items=items_list, total_gifts=args['gifts'], partitions=args['partitions'], thread_pool=pool) results.reverse() if len(results) == 0: print("Not Possible") else: print( reduce( lambda a, b: a + ", " + b, map(lambda item: "%s %d" % (item['item'], item['price']),
#!/usr/bin/env python # # Author: Mike McKerns (mmckerns @caltech and @uqfoundation) # Copyright (c) 1997-2014 California Institute of Technology. # License: 3-clause BSD. The full license text is available at: # - http://trac.mystic.cacr.caltech.edu/project/pathos/browser/pathos/LICENSE from pathos.multiprocessing import ProcessingPool as Pool from pathos.multiprocessing import ThreadingPool as TPool pool = Pool() tpool = TPool() # pickle fails for nested functions def adder(augend): zero = [0] def inner(addend): return addend+augend+zero[0] return inner # build from inner function add_me = adder(5) # build from lambda functions squ = lambda x:x**2 # test 'dilled' multiprocessing for inner print "Evaluate 10 items on 2 proc:" pool.ncpus = 2 print pool print pool.map(add_me, range(10)) print ''
def splitted_conv2d(batch: torch.Tensor, weights: torch.Tensor, padding: int) -> torch.Tensor: with ThreadingPool(4) as p: results = p.map(lambda el: F.conv2d(el[0].unsqueeze(0), el[1], padding=padding), zip(batch, weights)) return torch.cat(results, dim=0)
def trainModel(options, cap, edges, g_1, polNNs, length, listofenvironments, T_min, T_max, train_mode, num_of_agents, num_of_zones, GOALS): uniqueGOALS = list(np.unique(GOALS)) num_of_GOALS = len(uniqueGOALS) GOALS_index = {yy: xx for xx, yy in enumerate(uniqueGOALS)} with tf.Session(graph=g_1, config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess: sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter( '/home/jiajing/Project/REINFORCE/PythonScripts/tflogs/' + options.experimentname, sess.graph) #We will collect 32 trajectories/episodes per iteration N = options.N # Each trajectory will have at most 100 time steps T = options.T # Number of iterations n_itr = options.n_itr # Set the discount factor for the problem discount = options.discount #check whethe episode ends paths = [] Collision = [] # If you need to write the model, just write once here, no need to do it in parallel multiple times. WriteModel = False if WriteModel: # Set the default brain to work with env = listofenvironments[0] default_brain = env.brain_names[0] brain = env.brains[default_brain] env_info = env.reset(train_mode=train_mode, config={"WriteModel#0F#1T": 1.0})[default_brain] env.close() raise Exception( 'Please set WriteModel to False now and run again.') pool = Pool(processes=len(listofenvironments) + 4) all_static_info = {} all_static_info["edges"] = edges all_static_info["polNNs"] = polNNs all_static_info["length"] = length all_static_info["T_min"] = T_min all_static_info["T_max"] = T_max all_static_info["train_mode"] = train_mode all_static_info["num_of_agents"] = num_of_agents all_static_info["num_of_zones"] = num_of_zones all_static_info["GOALS"] = GOALS all_static_info["uniqueGOALS"] = uniqueGOALS all_static_info["num_of_GOALS"] = num_of_GOALS all_static_info["GOALS_index"] = GOALS_index all_static_info["cap"] = cap for i in range(0, n_itr): #new iteration with open(options.experimentname + '.txt', 'a+') as f: f.write('Iteration' + str(i) + '\n') obsdict = {} actdict = {} timeTodestdict = {} retdict = {} nextretdict = {} result = pool.amap(randomFunction, listofenvironments, [options] * len(listofenvironments), [all_static_info] * len(listofenvironments), [sess] * len(listofenvironments)) all_dictionaries_all_episodes = result.get() for data_from_env in range(0, N): total_len_eps, num_collision_eps, obsdict_eps, actdict_eps, timeTodestdict_eps, retdict_eps, nextretdict_eps = all_dictionaries_all_episodes[ data_from_env] for x in range(0, num_of_zones): if x in obsdict_eps: if x in obsdict: obsdict[x].extend(obsdict_eps[x]) actdict[x].extend(actdict_eps[x]) timeTodestdict[x].extend(timeTodestdict_eps[x]) retdict[x].extend(retdict_eps[x]) nextretdict[x].extend(nextretdict_eps[x]) else: obsdict[x] = [] actdict[x] = [] timeTodestdict[x] = [] retdict[x] = [] nextretdict[x] = [] obsdict[x].extend(obsdict_eps[x]) actdict[x].extend(actdict_eps[x]) timeTodestdict[x].extend(timeTodestdict_eps[x]) retdict[x].extend(retdict_eps[x]) nextretdict[x].extend(nextretdict_eps[x]) paths.append(total_len_eps) Collision.append(num_collision_eps) #iteration ends, start training Actor network loss2_before = 0 loss2_after = 0 for x in range(0, num_of_zones): inpdict = {} if x in obsdict: inpdict[polNNs[x].input_var] = np.asarray( obsdict[x]).reshape( (len(obsdict[x]), num_of_zones + num_of_agents)) inpdict[polNNs[x].input_var1] = np.asarray( actdict[x]).reshape( (len(actdict[x]), num_of_GOALS, len(edges[x]))) inpdict[polNNs[x].act_var] = np.asarray( actdict[x]).reshape( (len(actdict[x]), num_of_GOALS, len(edges[x]))) inpdict[polNNs[x].ret_var] = np.asarray( retdict[x]).reshape((1, len(retdict[x]))) inpdict[polNNs[x].nextret_var] = np.asarray( nextretdict[x]).reshape((1, len(nextretdict[x]))) inpdict[polNNs[x].phi] = np.asarray( timeTodestdict[x]).reshape((1, len(timeTodestdict[x]))) #learning rate decaying # polNNs[x].learning_rate = options.lr * 0.96**(i/5) loss2_before += sess.run(polNNs[x].loss, feed_dict=inpdict) sess.run(polNNs[x].learning_step, feed_dict=inpdict) loss2_after += sess.run(polNNs[x].loss, feed_dict=inpdict) summary = tf.Summary() summary.value.add(tag='summaries/length_of_path', simple_value=np.mean(paths[i * N:(i + 1) * N])) summary.value.add(tag='summaries/num_collision', simple_value=np.mean(Collision[i * N:(i + 1) * N])) summary.value.add(tag='summaries/actor_training_loss', simple_value=(loss2_before + loss2_after) / 2) writer.add_summary(summary, i) writer.flush() saver = tf.train.Saver() save_path = '/home/jiajing/Project/REINFORCE/PythonScripts/trainedModel/' + options.experimentname if not os.path.exists(save_path): os.makedirs(save_path) saver.save(sess, save_path + '/model.ckpt') for env in listofenvironments: env.close()
def threadcompute(self, xs): pool = ThreadingPool(4) results = pool.map(self.compute, xs) return results
def evaluate(fitness: callable, population: np.ndarray, pool_size: int): pool = Pool(pool_size) evaluated_population = pool.map(fitness, population) evaluated_population.sort(key=lambda x: x[1]) return np.array(evaluated_population, dtype=object)
def initializeRL(self, filename): f = open(self.config.workDir + 'Logs/' + filename + '.csv', 'w', 0) f.write( 'Iteration,AvgMDPReward,AvgEventReward,AvgSystemReward,KMDPReward,KEventReward,KSystemReward,Time\n' ) b = open(self.config.workDir + 'Logs/Baseline_' + filename + '.csv', 'w', 0) b.write('Iteration,') for j in xrange(0, self.num_agents): b.write('Agent_' + str(j) + '_Initial Loss,' + 'Agent_' + str(j) + '_Final Loss, ,') b.write('\n') pol = open(self.config.workDir + 'Logs/Policy_' + filename + '.csv', 'w', 0) pol.write('Iteration,') for j in xrange(0, self.num_agents): pol.write('Agent_' + str(j) + '_Initial Loss,' + 'Agent_' + str(j) + '_Final Loss, ,') pol.write('\n') trains = [] event_maps = [] test_maps = [] files = [] placeholders = [] for i in xrange(0, self.num_agents): train_instance = Train(i, self.config) event_maps.append(train_instance.eventMap) trains.append(train_instance) g_1 = tf.Graph() with g_1.as_default(): for i in xrange(0, self.num_agents): with tf.device('/device:CPU:1' + str(i)): pl = PlaceholderClass(agent=i, c=self.config, t=trains[i]) pl = pl.computationalGraphs() trains[i].policy = pl.policy trains[i].baseline = pl.baseline assert pl.t.policy != None assert pl.t.baseline != None placeholders.append(pl) with tf.Session(graph=g_1, config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=False)) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() if self.config.runWithSavedModel: saver.restore(sess, "../models/model_" + filename + ".ckpt") print("Model restored.") avg_arr = [] ev_avg_arr = [] mdp_avg_arr = [] p = Pool(processes=(self.num_agents + 2)) stopwatch = Stopwatch() orig_base_loss = [ placeholders[j].loss_baseline for j in xrange(0, self.num_agents) ] learn_baseline = [ placeholders[j].learning_step_baseline for j in xrange(0, self.num_agents) ] new_base_loss = [ placeholders[j].loss_baseline for j in xrange(0, self.num_agents) ] orig_policy_loss = [ placeholders[j].loss for j in xrange(0, self.num_agents) ] learn_policy = [ placeholders[j].learning_step for j in xrange(0, self.num_agents) ] new_policy_loss = [ placeholders[j].loss for j in xrange(0, self.num_agents) ] writer = tf.summary.FileWriter('../tflogs/' + filename, sess.graph) for curr_iter in xrange(1, self.config.numIterations + 1): # print "---------Current Iteration----------: ", curr_iter event_maps = [] test_maps = [] res = p.amap(self._instance_method_alias_run_iteration, trains, [sess] * self.num_agents) trains = res.get() for j in xrange(0, self.num_agents): event_maps.append(trains[j].eventMap) res = p.amap(self._instance_method_alias_update_eventArrays, trains, [event_maps] * self.num_agents) trains = res.get() feeding_dicts = [] baseline_dicts = [] test_maps = [] for j in xrange(0, self.num_agents): feeding_d, baseline_d, testMap_d = trains[ j].giveMeEverything(placeholders[j]) feeding_dicts.append(feeding_d) baseline_dicts.append(baseline_d) test_maps.append(testMap_d) assert self.num_agents >= 2 merged_dicts_baseline = self.merge_two_dicts( baseline_dicts[0], baseline_dicts[1]) merged_dicts_feeding = self.merge_two_dicts( feeding_dicts[0], feeding_dicts[1]) for j in xrange(2, self.num_agents): merged_dicts_baseline = self.merge_two_dicts( merged_dicts_baseline, baseline_dicts[j]) merged_dicts_feeding = self.merge_two_dicts( merged_dicts_feeding, feeding_dicts[j]) #----------------------- # Baseline training #----------------------- for j in xrange(0, self.num_agents): # Set training mode to False for evaluating loss. merged_dicts_baseline[ placeholders[j].train_mode_baseline] = False # print "Original Baseline Loss: ", orig_base_loss_vals = sess.run(orig_base_loss, feed_dict=merged_dicts_baseline) # print orig_base_loss_vals for j in xrange(0, self.num_agents): # Set training mode to True for learning step. merged_dicts_baseline[ placeholders[j].train_mode_baseline] = True sess.run(learn_baseline, feed_dict=merged_dicts_baseline) for j in xrange(0, self.num_agents): # Set training mode to False for evaluating loss after training. merged_dicts_baseline[ placeholders[j].train_mode_baseline] = False # print "New Baseline Loss: ", new_base_loss_vals = sess.run(new_base_loss, feed_dict=merged_dicts_baseline) # print new_base_loss_vals #----------------------- # Policy training #----------------------- for j in xrange(0, self.num_agents): # Set training mode to False for evaluating loss. merged_dicts_feeding[ placeholders[j].policy.train_mode] = False orig_policy_loss_vals = sess.run( orig_policy_loss, feed_dict=merged_dicts_feeding) # print "Original Policy Loss: ", orig_policy_loss_vals for j in xrange(0, self.num_agents): # Set training mode to True for learning step. merged_dicts_feeding[ placeholders[j].policy.train_mode] = True sess.run(learn_policy, feed_dict=merged_dicts_feeding) for j in xrange(0, self.num_agents): # Set training mode to False for evaluating loss after training. merged_dicts_feeding[ placeholders[j].policy.train_mode] = False new_policy_loss_vals = sess.run(new_policy_loss, feed_dict=merged_dicts_feeding) # print "New Policy Loss: ", new_policy_loss_vals mdp_avg_val, event_avg_val, system_avg_val, all_mdp_values = self.getAvgRewardFromEvents( test_maps) # print '\tIT:', curr_iter, ' Individual Average MDP Return:', all_mdp_values # print '\tIT:', curr_iter, ' Average MDP Return:', mdp_avg_val # print '\tIT:', curr_iter, ' Average Event Return:', event_avg_val # print '\tIT:', curr_iter, ' Average Return:', system_avg_val mdp_avg_arr.append(mdp_avg_val) mdp_avg_k_val = np.mean(np.array(mdp_avg_arr[-100:])) # print '\tLast K iter MDP avg ', mdp_avg_k_val, '\n' ev_avg_arr.append(event_avg_val) ev_avg_k_val = np.mean(np.array(ev_avg_arr[-100:])) # print '\tLast K iter Event avg ', ev_avg_k_val, '\n' avg_arr.append(system_avg_val) avg_arr_k_val = np.mean(np.array(avg_arr[-100:])) # print '\tLast K iter System avg ', avg_arr_k_val, '\n' elapTime = stopwatch.elapsedTime() # print '\tElapsed Time: ', elapTime avg_time_per_iter = elapTime / curr_iter # print '\tAverage Time per Iteration: ', avg_time_per_iter if curr_iter % self.config.savingThreshold == 0: save_path = saver.save( sess, "../models/model_" + filename + ".ckpt") print("Model saved in path: %s" % save_path) if curr_iter >= self.config.loggingThreshold: summary = tf.Summary() summary.value.add(tag='Average/mdp_avg', simple_value=mdp_avg_val) summary.value.add(tag='Average/event_avg', simple_value=event_avg_val) summary.value.add(tag='Average/system_avg', simple_value=system_avg_val) summary.value.add(tag='KAverage/mdp_avg_k', simple_value=mdp_avg_k_val) summary.value.add(tag='KAverage/event_avg_k', simple_value=ev_avg_k_val) summary.value.add(tag='KAverage/system_avg_k', simple_value=avg_arr_k_val) summary.value.add(tag='Timing/elapsed_time_var', simple_value=elapTime) summary.value.add(tag='Timing/avg_time_per_iteration', simple_value=avg_time_per_iter) writer.add_summary(summary, curr_iter) writer.flush() f.write( str(curr_iter) + ',' + str(mdp_avg_val) + ',' + str(event_avg_val) + ',' + str(system_avg_val) + ',') f.write(str(mdp_avg_k_val) + ',') f.write(str(ev_avg_k_val) + ',') f.write(str(avg_arr_k_val) + ',') f.write(str(elapTime) + '\n') b.write(str(curr_iter) + ",") pol.write(str(curr_iter) + ",") for j in xrange(0, self.num_agents): b.write( str(orig_base_loss_vals[j]) + "," + str(new_base_loss_vals[j]) + ", ,") pol.write( str(orig_policy_loss_vals[j]) + "," + str(new_policy_loss_vals[j]) + ", ,") b.write("\n") pol.write("\n") p.close() f.close() b.close() pol.close()