def make_monte_carlo_batch(self, nb_episodes, render, policy, weights_flag=False, weights=None): """ Create a batch of episodes with a given policy Used in Monte Carlo approaches :param nb_episodes: the number of episodes in the batch :param render: whether the episode is displayed or not (True or False) :param policy: the policy controlling the agent :param weights_flag :param weights :return: the resulting batch of episodes """ if weights_flag: batch = Batch(weights) else: batch = Batch() self.env.set_reward_flag(False) self.env.set_duration_flag(False) for e in range(nb_episodes): episode = self.train_on_one_episode(policy, False, render) batch.add_episode(episode) return batch
def get_decode_data(hps, vocab, data_path, randomize=False): tf.logging.info('Fetching data..') filelist = glob.glob(data_path) inputs = [] total_examples = 0 total_batches = 0 for f in filelist: reader = open(f, 'rb') while True: len_bytes = reader.read(8) if not len_bytes: break str_len = struct.unpack('q', len_bytes)[0] example_str = struct.unpack('%ds' % str_len, reader.read(str_len))[0] e = example_pb2.Example.FromString(example_str) try: article_text = e.features.feature['article'].bytes_list.value[ 0].decode() if len(article_text) == 0: #tf.logging.warning('Found an example with empty article text. Skipping it.') pass else: abstract_text = e.features.feature[ 'abstract'].bytes_list.value[0].decode() abstract_sentences = [ sent.strip() for sent in data.abstract2sents(abstract_text) ] example = Example(article_text, abstract_sentences, vocab, hps) inputs.append(example) total_examples = total_examples + 1 except ValueError: #tf.logging.error('Failed to get article or abstract from example') continue batches = [] tf.logging.info('Creating batches..') if randomize: random.shuffle(inputs) example = inputs[0] b = [example for _ in range(hps.beam_size)] batches.append(Batch(b, hps, vocab)) total_batches = 1 total_examples = 1 else: for i in range(0, len(inputs)): b = [inputs[i] for _ in range(hps.beam_size)] batches.append(Batch(b, hps, vocab)) total_batches = total_batches + 1 tf.logging.info('[TOTAL Batches] : %i', total_batches) tf.logging.info('[TOTAL Examples] : %i', total_examples) tf.logging.info('Creating batches..COMPLETE') return batches
def clusterize(path, algo): attacks = list(read_csv(path)) cluster_builder = ClusterBuilder(nitems=len(attacks)) batch = Batch() addr_to_index = {} for index, attack in enumerate(attacks): batch.feed(attack) addr = attack.source_addr try: prev_index = addr_to_index[addr] except KeyError: pass else: cluster_builder.merge(prev_index, index) addr_to_index[addr] = index db = algo.fit([batch.features(attack) for attack in attacks]) specimen = [-1 for _ in range(len(set(db.labels_)))] for index, attack in enumerate(attacks): cluster = db.labels_[index] if cluster == -1: cluster_builder.mark_as_noise(index) continue if specimen[cluster] != -1: cluster_builder.merge(specimen[cluster], index) specimen[cluster] = index return Clusters(attacks=attacks, classes=cluster_builder.finalize())
def post(self): """ Execute GraphQL queries and mutations Use this endpoint to send http request to the GraphQL API. """ payload = request.json # Execute request on GraphQL API status, data = utils.execute_graphql_request(payload['query']) # Execute batch of indicators if status == 200 and 'executeBatch' in payload['query']: if 'id' in data['data']['executeBatch']['batch']: batch_id = str(data['data']['executeBatch']['batch']['id']) batch = Batch() batch.execute(batch_id) else: message = "Batch Id attribute is mandatory in the payload to be able to trigger the batch execution. Example: {'query': 'mutation{executeBatch(input:{indicatorGroupId:1}){batch{id}}}'" abort(400, message) # Test connectivity to a data source if status == 200 and 'testDataSource' in payload['query']: if 'id' in data['data']['testDataSource']['dataSource']: data_source_id = str( data['data']['testDataSource']['dataSource']['id']) data_source = DataSource() data = data_source.test(data_source_id) else: message = "Data Source Id attribute is mandatory in the payload to be able to test the connectivity. Example: {'query': 'mutation{testDataSource(input:{dataSourceId:1}){dataSource{id}}}'" abort(400, message) if status == 200: return jsonify(data) else: abort(500, data)
def test_batch_job_spawn(self): self.os_mock.listdir = mock.MagicMock() self.os_mock.listdir.return_value = ['job-dir1', 'job-dir2'] from batch_midwife import BatchMidwife from batch import Batch midwife = BatchMidwife() midwife.apprentice = mock.MagicMock() midwife.client = mock.MagicMock() midwife.batch_pub_sub = mock.MagicMock() midwife.batch_pub_sub.listen.return_value = [{'data': 'batch-lovelyhashcode'}] midwife.client.exists.return_value = True batch = Batch('uploaded') midwife.client.get.return_value = pickle.dumps(batch) midwife.client.set = mock.MagicMock() midwife.client.publish = mock.MagicMock() midwife.run() assert midwife.client.exists.call_count == 1 assert midwife.client.get.call_count == 1 assert midwife.client.set.call_count == 4 assert midwife.client.publish.call_count == 2 assert midwife.client.set.call_args_list[1][0][0] == 'job-dir1_1' assert midwife.client.set.call_args_list[2][0][0] == 'job-dir2_1' assert pickle.loads(midwife.client.set.call_args_list[3][0][1]).state == 'running' assert self.os_mock.listdir.call_count == 1
def driver_listener(transaction_queue): start = time() driver = Driver() i = 0 while True: batch_file = transaction_queue.get() batch = Batch() batch.load(batch_file) for transaction in batch.items: try: added = driver.run(transaction) duration = time() - start total = len(driver.hset) + len(driver.lset) print('Driver rate: {} of {} ({}|{})\r'.format(round(total / duration, 3), total, len(driver.hset), len(driver.lset)), flush=True, end='') if added: i += 1 except KeyboardInterrupt: raise KeyboardInterrupt except neobolt.exceptions.CypherSyntaxError: pass except Exception as e: print(e, flush=True) print(transaction.in_label, flush=True) print(transaction.out_label, flush=True) print(transaction.uuid, flush=True) print(transaction.from_uuid, flush=True) print(transaction.data, flush=True)
def getNext(self): "iterator" batchRange = range(self.currIdx, self.currIdx + config.BATCH_SIZE) gtTexts = [self.samples[i].gtText for i in batchRange] imgs = [] for i in batchRange: try: self.binaryImageFile.seek(self.samples[i].imageStartPosition) img = np.frombuffer( self.binaryImageFile.read(self.samples[i].imageSize), np.dtype('B')) img = img.reshape(self.samples[i].imageHeight, self.samples[i].imageWidth) img = preprocess(img, config.IMAGE_WIDTH, config.IMAGE_HEIGHT, config.RESIZE_IMAGE, config.CONVERT_IMAGE_TO_MONOCHROME, config.AUGMENT_IMAGE) imgs.append(img) except IOError as e: print("I/O error({0}): {1}".format(e.errno, e.strerror)) pass except ValueError as e: print("Value error({0}): {1}".format(e.errno, e.strerror)) pass except: print("Unexpected error:", sys.exc_info()[0]) pass self.currIdx += config.BATCH_SIZE return Batch(gtTexts, imgs)
def get_experience(self, insert_dummy=True): batch = Batch(dummy=[]) for i in range(self.env_num): #append to total batch batch.append(self.buffers[i]) #set dummy key if insert_dummy: length = len(self.buffers[i]) batch.dummy += [False] * length #append dummy item at the end if not done if length: if not batch[-1].done: dummy_item = batch[-1:] dummy_item.dummy = [True] dummy_item.done = [True] dummy_item.state = dummy_item.next_state batch.append(dummy_item) return batch.to_numpy()
def reset(self): #---------------------------------------------- # failures arrive by using poisson distribution #---------------------------------------------- if self.failure_type == 0: trace = Poisson(self.sys.num_disks, self.failure_percent, self.mtbf) if self.failure_type == 1: trace = Exponential(self.sys.num_disks, self.failure_percent, self.mtbf) if self.failure_type == 2: trace = Batch(self.sys.num_disks, self.failure_percent, self.mtbf, cascade_factor=10.0) self.trace_entry = trace.generate_failures() #------------------------------------------ # put the disk failures in the event queue #------------------------------------------ self.events_queue = [] for disk_fail_time, diskId in self.trace_entry: heappush(self.events_queue, (disk_fail_time, Disk.EVENT_FAIL, diskId)) print ">>>>> reset disk", diskId, Disk.EVENT_FAIL, "@", disk_fail_time self.mission_time = disk_fail_time print " - system mission time - ", self.mission_time #------------------------------ # initialize the system state #------------------------------ self.state = State(self.sys, self.rebuild, self.copyback, self.events_queue)
def load_keyboard_consensus(self, speaker_id): result = [] data_root = os.path.join(os.path.dirname(__file__), "../analysis/data/") batch = Batch() batch.load(data_root) batch._getDataOfVideo(speaker_id, result) return result
def run(self): with open(self.train_data, 'rb') as f: data = pickle.load(f) with open(self.train_labels, 'rb') as f: labels = pickle.load(f) labels = one_hot(sorted(list(set(labels))), labels) with open(self.test_data, 'rb') as f: data_t = pickle.load(f) with open(self.test_labels, 'rb') as f: labels_t = pickle.load(f) labels_t = one_hot(sorted(list(set(labels_t))), labels_t) b = Batch(data, labels, Params.batch_size) var = tf.trainable_variables() conv = [v for v in var if v.name.startswith("conv")] fool = [v for v in var if v.name.startswith("fooling")] fc = [v for v in var if v.name.startswith("fc")] smax = [v for v in var if v.name.startswith("soft_max")] cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.output)) optimiser = tf.train.AdamOptimizer(Params.learning_rate).minimize(cross_entropy, var_list=conv + fc + smax) # collect prediction in the batch correct_prediction = tf.equal(tf.argmax(self.pred, 1), tf.argmax(self.output, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) total_batch = int(len(data) / Params.batch_size) learning = [] if self.trainable: saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(Params.epoch): b.shuffle() avg_cost = 0 print ("{} epoch".format(epoch)) for i in range(total_batch): batch_x, batch_y = b.next_batch() _, cost = sess.run([optimiser, cross_entropy], feed_dict={model.input: batch_x, model.output: batch_y}) avg_cost += cost/total_batch acc = sess.run(accuracy, feed_dict={model.input: data_t, model.output: labels_t}) learning.append(acc) # saving the model if epoch % 10 == 0: pass # checkpoint_path = os.path.join(Params.checkpoint_path, 'model.ckpt') # save_path = saver.save(sess, checkpoint_path) # print("model saved to {}".format(checkpoint_path)) # print (filters) # print(avg_cost, acc) plt.plot(learning) plt.title('Epoch vs Test accuracy') plt.xlabel('Epoch') plt.ylabel('Test accuracy') plt.show()
def data_gen(V, batch, nbatches): "Generate random data for a src-tgt copy task." # dtype = torch.FloatTensor for i in range(nbatches): data = torch.from_numpy(np.random.randint(1, V, size=(batch, 10))) data[:, 0] = 1 src = Variable(data, requires_grad=False) tgt = Variable(data, requires_grad=False) yield Batch(src, tgt, 0)
def execute_batch(self, max_nodes, ami, instance_type): batch_id = 'batch-%s' % uuid.uuid4() batch = Batch('received') batch.ami = ami batch.instance_type = instance_type batch.max_nodes = max_nodes self.client.set(batch_id, pickle.dumps(batch)) self.client.publish('batches', batch_id) return batch_id
def execute_batch(self, max_nodes, ami, instance_type, email=''): batch_id = 'batch-%s' % str(uuid.uuid4())[31:36] batch = Batch('received') batch.ami = ami batch.instance_type = instance_type batch.max_nodes = max_nodes batch.email = email self.client.set(batch_id, pickle.dumps(batch)) self.client.publish('batches', batch_id) return batch_id
def __init__(self, args): self._bidsDir = args.bids_dir self._dicomDir = args.dicom_dir self._session = Session(args.session, args.participant, self._bidsDir) self._parser = getattr(studyparser, args.algorithm)(self._dicomDir, self._session) self._yes = args.yes self._codeDir = os.path.join(self._bidsDir, 'code') utils.make_directory_tree(self._codeDir) self._batch = Batch(self._codeDir, self._session)
def regress(simu, policy, policy_type, nb_trajs, render=False) -> None: batch = Batch() simu.env.set_reward_flag(False) simu.env.set_duration_flag(False) if policy_type == "bernoulli" or policy_type == "discrete": batch = perform_expert_episodes_bangbang(simu, batch, nb_trajs, render) else: batch = perform_expert_episodes_continuous(simu, batch, nb_trajs, render) # print("size: ", batch.size()) batch.train_policy_through_regress(policy)
def __init__(self, batch_size, output_file, schema, validator_map): write_function = csv.writer(output_file, delimiter=',', lineterminator='\n').writerows self.batch = Batch(batch_size, write_function) self.header = None self.header_map = {} self.header_written = False self.schema = schema self.validator_map = validator_map
def __init__( self, package, cli_composer, deploy_status, use_package_path=False): """ Initialize an instance of Deployment """ import uuid if (use_package_path): self.batch = Batch(package.cwd) else: self.batch = Batch(tempfile.mkdtemp()) self.cli_composer = cli_composer self.cwd = self.batch.cwd self.cwd_use_package_path = use_package_path self.deployed = False self.deployment_id = '{0}'.format(uuid.uuid1()) self.deploy_status = deploy_status self.package = package self.started = False
def inferSingleImage(paraModel, paraFnImg): "recognize text in image provided by file path" img = cv2.imread(paraFnImg, cv2.IMREAD_GRAYSCALE) img = preprocess(img, config.IMAGE_WIDTH, config.IMAGE_HEIGHT, True, False, False) batch = Batch(None, [img]) #(recognized, probability) = model.inferBatch(batch) (recognized, probability) = paraModel.inferBatch(batch, True) print('Recognized:', '"' + recognized[0] + '"') print('Probability:', probability[0])
def run(self): #update local policy vars on an interval for stability if self.learner_policy.get_step() % self.update_interval == 0: self.pull_vars() #print('learner at %s, actor at %s' % ( # self.learner_policy.get_step(), # self.local_policy.get_step())) #FIXME: last obs might be from different game #since games are 4.5k steps, not a big deal n_actions = self.env.action_space.n batch = Batch() state = self.last_obs #first action in each new env is ~random lstm_state = self.local_policy.lstm_init_state done = step = 0 while not done and step < self.steps: action, value, logit, lstm_state = self.local_policy.act( state, lstm_state) next_state, reward, done, _ = self.env.step(action) #skip the specified number of frame, aggregate rewards? #FIXME: dont just skip, stack the frames #might mess things up if predicting above #using non-diff and diff frames #aggregate and non aggregate #must be constant #note the env employs frame skipping already #more skipping seems to lead to a better policy though #for _ in range(3): # if done: # break # next_state, reward_s, done, _ = self.env.step(action) #reward += reward_s #process observation data next_state = process_state(next_state) if type(action) == np.int64: action = to_onehot(action, n_actions) #add experience to batch batch.add((state, action, reward, value, done, next_state, logit, lstm_state)) #update step += 1 state = next_state self.last_obs = state return batch.get()
def make_test_batches(self, test_list): sorted_tests = [] for e in test_list.tests_by_exe.values(): for tst in e: sorted_tests.append(tst) # slowest tests first, then group tests by the same exe sorted_tests.sort(key=lambda tst: (-tst.duration, tst.exe_name)) result = [] batch = None for tst in sorted_tests: if not batch: batch = Batch(len(result)) result.append(batch) should_add = True else: should_add = batch.total_duration() < self.wished_duration if not should_add: batch = Batch(len(result)) result.append(batch) batch.add_test(tst) return result
def from_folders(folders): """ Loads all the batches in given folders :param folders: A list of folders :return: A list of successfully loaded batches """ batches = list() for filepath in parser.from_folders(folders): batch = Batch(filepath=filepath) if batch.load_status == OK: batches.append(batch) return batches
def get_specific_example(hps, vocab, example_number): file_id, number = divmod(example_number, 1000) path = '/home/ubuntu/W266/final_0/W266_Final/data/final_chunked/validation_%03d.bin' % file_id print(f'Fetching example {number} from: {path}') filelist = glob.glob(path) inputs = [] total_examples = 0 total_batches = 0 for f in filelist: reader = open(f, 'rb') while True: len_bytes = reader.read(8) if not len_bytes: break str_len = struct.unpack('q', len_bytes)[0] example_str = struct.unpack('%ds' % str_len, reader.read(str_len))[0] e = example_pb2.Example.FromString(example_str) try: article_text = e.features.feature['article'].bytes_list.value[ 0].decode() if len(article_text) == 0: #tf.logging.warning('Found an example with empty article text. Skipping it.') pass else: abstract_text = e.features.feature[ 'abstract'].bytes_list.value[0].decode() abstract_sentences = [ sent.strip() for sent in data.abstract2sents(abstract_text) ] example = Example(article_text, abstract_sentences, vocab, hps) inputs.append(example) total_examples = total_examples + 1 except ValueError: #tf.logging.error('Failed to get article or abstract from example') continue batches = [] tf.logging.info('Creating batches..') example = inputs[number] b = [example for _ in range(hps.beam_size)] batches.append(Batch(b, hps, vocab)) total_batches = 1 total_examples = 1 tf.logging.info('[TOTAL Batches] : %i', total_batches) tf.logging.info('[TOTAL Examples] : %i', total_examples) tf.logging.info('Creating batches..COMPLETE') return batches
def data_gen(V, batch, nbatches): """ :param V: :param batch: :param nbatches: :return: """ "Generate random data for a src-tgt copy task." for i in range(nbatches): data = torch.from_numpy(np.random.randint(1, V, size=(batch, 10))) data[:, 0] = 1 src = Variable(data, requires_grad=False) tgt = Variable(data, requires_grad=False) yield Batch(src, tgt, 0)
def module_runner(module_name, serialize_queue, batch_file): module = fetch(module_name) if batch_file is None: gen = module.process() else: batch = Batch() batch.load(batch_file) # print(batch.items) gen = [transaction for item in batch.items for transaction in module.process(item)] i = 0 for transaction in gen: serialize_queue.put(transaction) i += 1
def train(self, inputs, labels, epochs=1000, loss=MSE()): batcher = Batch() for epoch in range(epochs): epoch_cost = 0 for input_set, label_set in batcher(inputs, labels): output = self.forward(input_set) epoch_cost += MSE().cost(output, label_set) gradient = MSE().gradients(output, label_set) self.backward(gradient) for layer in self.layers: if issubclass(layer.__class__, LinearLayer): self.sgd(layer) print "Epoch: {} - Cost: {}".format(epoch, epoch_cost)
def data_gen(num_words=11, batch_size=16, num_batches=100, length=10, pad_index=0, sos_index=1): """Simple data for copy task.""" batches = [] for i in range(num_batches): data = torch.from_numpy( np.random.randint(1, num_words, size=(batch_size, length)) ) data[:, 0] = sos_index data = data.cuda() if init.USE_CUDA else data src = data[:, 1:] trg = data src_lengths = [length-1] * batch_size trg_lengths = [length] * batch_size batches.append(Batch((src, src_lengths), (trg, trg_lengths), pad_index=pad_index)) return batches
def data_gen(self, batch_size=16, num_batches=100, eval=False): batches = [] datasrc = self.train_src datatrg = self.train_trg if eval: datasrc = self.validate_src datatrg = self.validate_trg for i in range(num_batches): batchat = random.randrange(0, len(datasrc)) sample = random.sample( xrange(max(0, batchat - batch_size), min(batchat + batch_size, len(datasrc))), batch_size) datasrcbatch_temp = [datasrc[j] for j in sample] datatrgbatch_temp = [datatrg[j] for j in sample] datasrcbatch = [] datatrgbatch = [] for pair in reversed( sorted(enumerate(datasrcbatch_temp), key=lambda x: len(x[1]))): datasrcbatch.append(pair[1]) datatrgbatch.append(datatrgbatch_temp[pair[0]]) src_lengths = [len(j) for j in datasrcbatch] #[self.maxlen] * batch_size# trg_lengths = [len(j) for j in datatrgbatch ] #[self.maxlen - 1] * batch_size# for each in datasrcbatch: j = len(each) while j < max(src_lengths): each.append(0) j += 1 for each in datatrgbatch: j = len(each) while j < max(trg_lengths): each.append(0) j += 1 datasrcbatch = torch.LongTensor(datasrcbatch) datatrgbatch = torch.LongTensor(datatrgbatch) datasrcbatch = datasrcbatch.cuda( ) if init.USE_CUDA else datasrcbatch datatrgbatch = datatrgbatch.cuda( ) if init.USE_CUDA else datatrgbatch yield Batch((datasrcbatch, src_lengths), (datatrgbatch, trg_lengths), pad_index=0)
def generic_collate(item_list,channel_order=CHANNEL_LAST): """ Collate single input, single output Item. ARGS: item_list: (Batch) list. """ xs_list = [np.expand_dims(item.xs,0) for item in item_list] ys_list = [np.expand_dims(item.ys,0) for item in item_list] ids_list = [item.ids for item in item_list] extra_list = [item.extra for item in item_list] batch_xs = guess_collate(xs_list)(xs_list) batch_ys = guess_collate(ys_list)(ys_list) batch_ids = ids_list batch_extra = extra_list collacted_batch = Batch(batch_xs, batch_ys, batch_ids, batch_extra) return collacted_batch
def __init__(self,env_name, num_threads, gamma= 0.99,actor_learning_rate = 0.001, actor_batch_size = 64,critic_learning_rate = 0.01,\ entropy_beta = 0.01,critic_batch_size = 16,critic_epochs = 100, max_episodes_per_thread = 100, episode_to_train= 4): self.envs = [gym.make(env_name).env for _ in range(num_threads)] if self.envs[0].observation_space.shape == (): input_shape = 1 else: input_shape = self.envs[0].observation_space.shape[0] self.actor = Actor(actor_learning_rate, actor_batch_size, input_shape, self.envs[0].action_space.n, entropy_beta) self.critic = Critic(critic_learning_rate, critic_batch_size, critic_epochs, input_shape, 1) batch = Batch(self.actor, self.critic, batch_size=actor_batch_size) lock = Lock() self.threads = [ Env_thread("thread" + str(i), lock, batch, self.envs[i], self.actor, self.critic, gamma, max_episodes_per_thread, episode_to_train) for i in range(num_threads) ]