def ebHDD3(A, B): dist_mat = np.full((len(A), len(B)), fill_value=np.nan) # directed hdd from A to B cmax = 0 Ar = list(perm(A)) Br = list(perm(B)) for i in range(len(Ar)): cmin = math.inf for j in range(len(Br)): d = dist_mat[i][j] = minHausdorff(Ar[i], Br[j]) if (d < cmax): cmin = 0 break cmin = min([cmin, d]) cmax = max([cmax, cmin]) # directed hdd from B to A dmax = 0 for i in range(len(Ar)): dmin = math.inf for j in range(len(Br)): if (np.isnan(dist_mat[i][j])): d = dist_mat[i][j] = minHausdorff(Ar[i], Br[j]) else: d = dist_mat[i][j] if (d < dmax): dmin = 0 break dmin = min([dmin, d]) dmax = max([dmax, dmin]) return (max([dmax, cmax]))
def main(): import sys from numpy.random import permutation as perm from tqdm import tqdm sys.stdin = open("input.txt") rl = sys.stdin.readline int1 = int range1 = range len1 = len list1 = list set1 = set str1 = str n = int1(rl()) himages = [] vimages = [] ha, va = himages.append, vimages.append for i in range1(n): line = rl().strip() o, ntags = line.split(' ')[:2] tags = line.split()[2:] if o == 'H': ha((tags, i + 1)) else: va((tags, i + 1)) ms = 0 ma = [] for i in tqdm(range1(1000)): hi = himages.copy() ha = hi.append vi = perm(vimages) for j in range1(len1(vi) // 2): ha((list1(set1(list1(vi[2 * j][0]) + list1(vi[2 * j + 1][0]))), '%d %d' % (vi[2 * j][1], vi[2 * j + 1][1]))) s = 0 hi = perm(hi) a = [str1(hi[0][1])] for j in range1(len1(hi) - 1): com = 0 for tag in hi[j][0]: if tag in hi[j + 1][0]: com += 1 s += min(len(hi[j][0]) - com, len(hi[j + 1][0]) - com, com) a.append(str1(hi[j + 1][1])) if s > ms: ms = s ma = a with open('%d.txt' % ms, 'w') as f: f.write(str1(len(ma)) + '\n') f.write('\n'.join(ma))
def ebHausdorff(A, B, metric): cmax = 0 Ar = list(perm(A)) Br = list(perm(B)) for x in Ar: cmin = math.inf for y in Br: d = metric(x, y) if (d < cmax): cmin = 0 break cmin = min([cmin, d]) cmax = max([cmax, cmin]) return cmax
def divideData(self, filename, num=5, mph=3, delet=True): print "Estimating heritability using " + str(num) + " components" direct = "TEMP" sFil = Bed(filename) yFil = Pheno(filename + ".fam") n = sFil.iid_count reOrd = perm(n) yFil = yFil[reOrd, :] sFil = sFil[reOrd, :] y = yFil.read().val[:, 3] div = [int(math.ceil(i * n / float(num))) for i in range(0, num + 1)] varEsts = [] for i in range(0, num): print "For component " + str(i) sFilTemp = self.BED[div[i]:div[i + 1], :] Xtemp = sFilTemp.read().standardize().val ytemp = y[div[i]:div[i + 1]] varEsts.append(self.VarCalc.RealVar(ytemp, Xtemp)) return varEsts
def cifar_generator(): """ X_data : (data_size, 32, 32, 3) y_label : (data_size, 100) batch_size : batch size """ (X_train, y_train), (X_test, y_test) = c100.load_data() # Load Data y_train, y_test = prepare_output_data(y_train, y_test) # prepare y_label data_size = len(X_train) batch_iter_per_epoch = int(data_size / batch_size) while True: shuffle_idx = perm(np.arange(len(X_train))) print('\n') print("*" * 30) print("Data Size : {}".format(data_size)) print("Batch Size : {}".format(batch_size)) print("Batch iterations per Epoch : {}".format(batch_iter_per_epoch)) print("*" * 30) print(shuffle_idx[0:10]) for b in range(batch_iter_per_epoch): batch_features = np.zeros((batch_size, inp_w, inp_h, inp_c)) batch_labels = np.zeros((batch_size, 100)) for b_i, i in enumerate( range(b * batch_size, b * batch_size + batch_size)): # choose random index in features batch_features[b_i] = preprocess(X_train[shuffle_idx[i]], color_type='RGB') batch_labels[b_i] = y_train[shuffle_idx[i]] yield batch_features, batch_labels print("Done Generator")
def shuffle(self): batch = self.FLAGS.batch data = self.parse() size = len(data) print('Dataset of {} instance(s)'.format(size)) if batch > size: self.FLAGS.batch = batch = size batch_per_epoch = int(size / batch) for i in range(self.FLAGS.epoch): shuffle_idx = perm(np.arange(size)) for b in range(batch_per_epoch): # yield these x_batch = list() feed_batch = dict() for j in range(b*batch, b*batch+batch): train_instance = data[shuffle_idx[j]] inp, new_feed = self._batch(train_instance) if inp is None: continue x_batch += [np.expand_dims(inp, 0)] for key in new_feed: new = new_feed[key] old_feed = feed_batch.get(key, np.zeros((0,) + new.shape)) feed_batch[key] = np.concatenate([ old_feed, [new] ]) x_batch = np.concatenate(x_batch, 0) yield x_batch, feed_batch print('Finish {} epoch(es)'.format(i + 1))
def divideData(filename,direct,num=5,mph=3,delet=True): print "Estimating heritability using "+str(num)+" components" [yFil,sFil]=getData(filename,mph=mph); n=sFil.iid_count reOrd=perm(n); yFil=yFil[reOrd,:]; sFil=sFil[reOrd,:]; div=[int(math.ceil( i*n/float(num) )) for i in range(0,num+1)]; varEsts=[]; for i in range(0,num): print "For component "+str(i); sFilTemp=sFil[div[i]:div[i+1],:]; yFilTemp=yFil[div[i]:div[i+1],:]; fileTemp=direct+"/tempFile_"+str(i); Bed.write(fileTemp,sFilTemp.read()); Pheno.write(fileTemp+".phen",yFilTemp.read()) varEsts.append(varRes(fileTemp,direct)); if delet: os.system("rm "+direct+"/tempFile_"+str(i)+"*"); return varEsts;
def shuffle(): batch_size = cfg.batch_size data = parse() size = len(data) print('Dataset of {} instance(s)'.format(size)) if batch_size > size: # 전체데이터가 Batch Size 보다 적을때를 대비하여 cfg.batch_size = batch_size = size batch_per_epoch = int(size / batch_size) for i in range(cfg.epochs): shuffle_idx = perm(np.arange(size)) # print("shuffle index : ", shuffle_idx) for b in range(batch_per_epoch): # yield these x_batch = list() feed_batch = dict() for j in range(b * batch_size, b * batch_size + batch_size): train_instance = data[shuffle_idx[j]] inp, new_feed = _batch(train_instance) if inp is None: continue x_batch += [np.expand_dims(inp, 0)] # inp.shape : 448, 448, 3 for key in new_feed: new = new_feed[key] old_feed = feed_batch.get(key, np.zeros((0, ) + new.shape)) feed_batch[key] = np.concatenate([old_feed, [new]]) # print("feed_batch : ", len(feed_batch), feed_batch['botright'].shape) # feed_batch : 7 (32, 49, 2, 2) # print("x_batch[0].shape : ", x_batch[0].shape) # x_batch.shape : (1, 448, 448, 3) x_batch = np.concatenate(x_batch, 0) yield x_batch, feed_batch print('Finish {} epoch'.format(i + 1))
def divideData(filename, direct, num=5, mph=3, delet=True): print "Estimating heritability using " + str(num) + " components" [yFil, sFil] = getData(filename, mph=mph) n = sFil.iid_count reOrd = perm(n) yFil = yFil[reOrd, :] sFil = sFil[reOrd, :] div = [int(math.ceil(i * n / float(num))) for i in range(0, num + 1)] varEsts = [] for i in range(0, num): print "For component " + str(i) sFilTemp = sFil[div[i]:div[i + 1], :] yFilTemp = yFil[div[i]:div[i + 1], :] fileTemp = direct + "/tempFile_" + str(i) Bed.write(fileTemp, sFilTemp.read()) Pheno.write(fileTemp + ".phen", yFilTemp.read()) varEsts.append(varRes(fileTemp, direct)) if delet: os.system("rm " + direct + "/tempFile_" + str(i) + "*") return varEsts
def divideData(self,filename,num=5,mph=3,delet=True): print "Estimating heritability using "+str(num)+" components" direct="TEMP" sFil=Bed(filename); yFil=Pheno(filename+".fam"); n=sFil.iid_count reOrd=perm(n); yFil=yFil[reOrd,:]; sFil=sFil[reOrd,:]; y=yFil.read().val[:,3]; div=[int(math.ceil( i*n/float(num) )) for i in range(0,num+1)]; varEsts=[]; for i in range(0,num): print "For component "+str(i); sFilTemp=self.BED[div[i]:div[i+1],:]; Xtemp=sFilTemp.read().standardize().val; ytemp=y[div[i]:div[i+1]]; varEsts.append(self.VarCalc.RealVar(ytemp,Xtemp)); return varEsts;
def split_CV(root_folder_path, train_rates=0.8): cats_folder_path = os.path.join(root_folder_path, "cats") dogs_folder_path = os.path.join(root_folder_path, "dogs") # cats folder -> train/cats, validation/cats files = [f for f in os.listdir(cats_folder_path)] files_size = len(files) print("training cats size : ", files_size) shuffle_idx = perm(np.arange(files_size)) trainval_size = int(files_size * train_rates) train_idx = shuffle_idx[:trainval_size] validation_idx = shuffle_idx[trainval_size:] for i in train_idx: print("train cat : ", files[i]) copyfile(os.path.join(cats_folder_path, files[i]), os.path.join(root_folder_path, "train", "cats", files[i])) for i in validation_idx: print("validation cat : ", files[i]) copyfile( os.path.join(cats_folder_path, files[i]), os.path.join(root_folder_path, "validation", "cats", files[i])) # dogs folder -> train/dogs, validation/dogs files = [f for f in os.listdir(dogs_folder_path)] files_size = len(files) print("training dogs size : ", files_size) shuffle_idx = perm(np.arange(files_size)) trainval_size = int(files_size * train_rates) train_idx = shuffle_idx[:trainval_size] validation_idx = shuffle_idx[trainval_size:] for i in train_idx: print("train dog : ", files[i]) copyfile(os.path.join(dogs_folder_path, files[i]), os.path.join(root_folder_path, "train", "dogs", files[i])) for i in validation_idx: print("validation dog : ", files[i]) copyfile( os.path.join(dogs_folder_path, files[i]), os.path.join(root_folder_path, "validation", "dogs", files[i]))
def _split(folder_path, train_rates=0.8): from numpy.random import permutation as perm import numpy as np files = [ f for f in os.listdir(folder_path) if f.split('.')[-1] == 'xml' or f.split('.')[-1] == 'png' ] files_size = len(files) shuffle_idx = perm(np.arange(files_size)) trainval_size = int(files_size * train_rates) return files, shuffle_idx[:trainval_size], shuffle_idx[trainval_size:]
def shuffle(self): """ Call the specific framework to parse annotations, then use the parsed object to yield minibatches. minibatches should be preprocessed before yielding to be appropriate placeholders for model's loss evaluation. """ data = self.framework.parse(self.FLAGS, self.meta) size = len(data) batch = self.FLAGS.batch print 'Dataset of {} instance(s)'.format(size) if batch > size: self.FLAGS.batch = batch = size batch_per_epoch = int(size / batch) total = self.FLAGS.epoch * batch_per_epoch yield total for i in range(self.FLAGS.epoch): print 'EPOCH {}'.format(i + 1) shuffle_idx = perm(np.arange(size)) for b in range(batch_per_epoch): end_idx = (b + 1) * batch start_idx = b * batch offbound = False # two yieldee x_batch = list() feed_batch = dict() for j in range(start_idx, end_idx): real_idx = shuffle_idx[j] this = data[real_idx] inp, feedval = self.framework.batch(self.FLAGS, self.meta, this) if inp is None: offbound = True break x_batch += [inp] for k in feedval: if k not in feed_batch: feed_batch[k] = [feedval[k]] continue feed_batch[k] = np.concatenate( [feed_batch[k], [feedval[k]]]) if offbound: print off_bound_msg continue x_batch = np.concatenate(x_batch, 0) yield (x_batch, feed_batch)
def shuffle(self): batch = self.FLAGS.batch data = self.parse() size = len(data) #pdb.set_trace() print('Dataset of {} instance(s)'.format(size)) if batch > size: self.FLAGS.batch = batch = size batch_per_epoch = int(size / batch) for i in range(self.FLAGS.epoch): shuffle_idx = perm(np.arange(size)) #pdb.set_trace() for b_ in range(batch_per_epoch): # yield these x_batch = list() feed_batch = dict() k = 0 #pdb.set_trace() for j_ in range(b_*batch, b_*batch+batch): train_instance = data[shuffle_idx[j_]] #pdb.set_trace() try: inp, new_feed = self._batch(train_instance) except ZeroDivisionError: print("This image's width or height are zeros: ", train_instance[0]) print('train_instance:', train_instance) print('Please remove or fix it then try again.') raise if inp is None: continue x_batch += [np.expand_dims(inp, 0)] #pdb.set_trace() for key in new_feed: new = new_feed[key] old_feed = feed_batch.get(key, np.zeros((0,) + new.shape)) #pdb.set_trace() feed_batch[key] = np.concatenate([ old_feed, [new] ]) x_batch = np.concatenate(x_batch, 0) #pdb.set_trace() yield x_batch, feed_batch print('Finish {} epoch(es)'.format(i + 1))
def shuffle(self): batch = self.flags.batch data = self.parse() self.flags.size = len(data) self.io_flags() self.logger.info('Dataset of {} instance(s)'.format(self.flags.size)) if batch > self.flags.size: self.flags.batch = batch = self.flags.size batch_per_epoch = int(self.flags.size / batch) for i in range(self.flags.epoch): shuffle_idx = perm(np.arange(self.flags.size)) for b in range(batch_per_epoch): # yield these x_batch = list() feed_batch = dict() for j in range(b * batch, b * batch + batch): train_instance = data[shuffle_idx[j]] self.logger.debug(train_instance[0]) try: inp, new_feed = self._batch(train_instance) except ZeroDivisionError: self.logger.error( "This image's width or height are zeros: ", train_instance[0]) self.logger.error('train_instance:', train_instance) self.logger.error( 'Please remove or fix it then try again.') raise if inp is None: continue x_batch += [np.expand_dims(inp, 0)] for key in new_feed: new = new_feed[key] old_feed = feed_batch.get(key, np.zeros((0, ) + new.shape)) feed_batch[key] = np.concatenate([old_feed, [new]]) x_batch = np.concatenate(x_batch, 0) yield x_batch, feed_batch self.logger.info('Finish {} epoch(es)'.format(i + 1))
def shuffle(self): """ Call the specific framework to parse annotations, then use the parsed object to yield minibatches. minibatches should be preprocessed before yielding to be appropriate placeholders for model's loss evaluation. """ data = self.framework.parse() size = len(data) batch = self.FLAGS.batch self.say('Dataset of {} instance(s)'.format(size)) if batch > size: self.FLAGS.batch = batch = size batch_per_epoch = int(size / batch) total = self.FLAGS.epoch * batch_per_epoch yield total for i in range(self.FLAGS.epoch): self.say('EPOCH {}'.format(i + 1)) shuffle_idx = perm(np.arange(size)) for b in range(batch_per_epoch): end_idx = (b + 1) * batch start_idx = b * batch # two yieldee x_batch = list() feed_batch = dict() for j in range(start_idx, end_idx): real_idx = shuffle_idx[j] this = data[real_idx] inp, feedval = self.framework.batch(this) if inp is None: continue x_batch += [np.expand_dims(inp, 0)] for key in feedval: if key not in feed_batch: feed_batch[key] = [feedval[key]] continue feed_batch[key] = np.concatenate( [feed_batch[key], [feedval[key]]]) x_batch = np.concatenate(x_batch, 0) yield (x_batch, feed_batch)
def shuffle(self): batch = self.FLAGS.batch #每批次的数据量 data = self.parse() #从指定ann文件夹中解析所有xml文件 #data保存[图片名_1,[图片w,图片h,[目标名_1,目标框xmin,目标框ymin,目标框xmax,目标框ymax]]的列表 size = len(data) #所有训练数据的长度 print('Dataset of {} instance(s)'.format(size)) if batch > size: self.FLAGS.batch = batch = size batch_per_epoch = int(size / batch) #整体训练集分成batch_per_epoch批次 for i in range(self.FLAGS.epoch): #整体训练次数 shuffle_idx = perm(np.arange(size)) #打乱数据顺序 for b in range(batch_per_epoch): #整体训练集分成batch_per_epoch批次 # yield these x_batch = list() feed_batch = dict() for j in range(b * batch, b * batch + batch): #每批次的数据 train_instance = data[shuffle_idx[j]] try: inp, new_feed = self._batch( train_instance ) #inp:当前这张图片的数据(h,w,c) new_feed:从ann得到的tf输入 except ZeroDivisionError: print("This image's width or height are zeros: ", train_instance[0]) print('train_instance:', train_instance) print('Please remove or fix it then try again.') raise if inp is None: continue x_batch += [np.expand_dims(inp, 0)] #x_batch:[?,h,w,c] for key in new_feed: new = new_feed[key] old_feed = feed_batch.get(key, np.zeros((0, ) + new.shape)) feed_batch[key] = np.concatenate([old_feed, [new]]) #将这批次的feed数据拼接 x_batch = np.concatenate(x_batch, 0) #(?,h,w,c) yield x_batch, feed_batch print('Finish {} epoch(es)'.format(i + 1))
def shuffle(annotation_path, img_path, labels, batch_size, epoch): data = parse(annotation_path, labels) size = len(data) if batch_size > size: batch_size = size batch_per_epoch = int(size / batch_size) for i in range(epoch): shuffle_idx = perm(np.arange(size)) for b in range(batch_per_epoch): x_batch = [] feed_batch = {} for j in range(b * batch_size, b * batch_size + batch_size): train_instance = data[shuffle_idx[j]] # inp, new_feed = batch(train_instance, img_path) try: inp, new_feed = batch(train_instance, img_path) except ZeroDivisionError: print("This image's width or height are zeros: ", train_instance[0]) print('train_instance: ', train_instance) print('Please remove or fix it then try again.') raise if inp is None: continue x_batch += [np.expand_dims(inp, 0)] for key in new_feed: new = new_feed[key] # python 中的get方法,当该键值存在时返回值,否则返回默认值。 old_feed = feed_batch.get(key, np.zeros((0, ) + new.shape)) feed_batch[key] = np.concatenate([old_feed, [new]]) x_batch = np.concatenate(x_batch, 0) yield x_batch, feed_batch print('Finish {} epoch()es'.format(i + 1))
def shuffle(self): batch = self.FLAGS.batch data = self.parse() size = len(data) print('Dataset of {} instance(s)'.format(size)) if batch > size: self.FLAGS.batch = batch = size batch_per_epoch = int(size / batch) for i in range(self.FLAGS.epoch): shuffle_idx = perm(np.arange(size)) for b in range(batch_per_epoch): # yield these x_batch = list() feed_batch = dict() for j in range(b*batch, b*batch+batch): train_instance = data[shuffle_idx[j]] try: inp, new_feed = self._batch(train_instance) except ZeroDivisionError: print("This image's width or height are zeros: ", train_instance[0]) print('train_instance:', train_instance) print('Please remove or fix it then try again.') raise if inp is None: continue x_batch += [np.expand_dims(inp, 0)] for key in new_feed: new = new_feed[key] old_feed = feed_batch.get(key, np.zeros((0,) + new.shape)) feed_batch[key] = np.concatenate([ old_feed, [new] ]) x_batch = np.concatenate(x_batch, 0) yield x_batch, feed_batch print('Finish {} epoch(es)'.format(i + 1))
def test_shuffle(): batch_size = cfg.batch_size test_data = pascal_voc_clean_xml(cfg.test_ann_location, cfg.classes_name, False) test_size = len(test_data) print("Test Dataset of {} instance(s)".format(test_size)) shuffle_idx = perm(np.arange(test_size)) print("Test shuffle index : ", shuffle_idx[0:10]) x_batch = list() feed_batch = dict() for j in range(batch_size): test_instance = test_data[shuffle_idx[j]] inp, new_feed = _batch(test_instance, is_test=True) if inp is None: continue x_batch += [np.expand_dims(inp, 0)] for key in new_feed: new = new_feed[key] old_feed = feed_batch.get(key, np.zeros((0, ) + new.shape)) feed_batch[key] = np.concatenate([old_feed, [new]]) x_batch = np.concatenate(x_batch, 0) return x_batch, feed_batch
def random_cluster(self, L): from numpy.random import permutation as perm from numpy.random import choice return [list(perm(list(choice(range(self.num_clust), size=L-self.num_clust))+range(self.num_clust)))]
def shuffle(image_dir, annotation_dir, image_ids, total_epoch=1): # get batches batch_size = cfg.batch_size # batch_per_epoch = int(len(image_ids) / batch_size) for i in range(total_epoch): shuffle_idx = perm(np.arange(len(image_ids))) img_batch = None coord_batch = None k = 0 for j in range(len(image_ids)): try: xywhc = None train_instance = image_ids[shuffle_idx[j]] if annotation_dir_deep: image_id = train_instance[0].split('.xml')[0] xywhc = convert_annotation(annotation_dir, image_id, train_instance[1]) coord = np.reshape(xywhc, [30, 5]) # print('imageID:{}, xywhc: {}'.format(image_id, xywhc)) image_data = convert_img(image_dir, image_id, train_instance[1]) else: image_id = train_instance.split('.xml')[0] xywhc = convert_annotation(annotation_dir, image_id, None) coord = np.reshape(xywhc, [30, 5]) # print('imageID:{}, xywhc: {}'.format(image_id, xywhc)) image_data = convert_img(image_dir, image_id, None) if not xywhc: continue img = np.reshape(image_data, [cfg.sample_size, cfg.sample_size, 3]) # data Aug # rnd = tf.less(tf.random_uniform(shape=[], minval=0, maxval=2), 1) # # # rnd is part of data Augmentation # def flip_img_coord(_img, _coord): # zeros = tf.constant([[0, 0, 0, 0, 0]] * 30, tf.float32) # img_flipped = tf.image.flip_left_right(_img) # idx_invalid = tf.reduce_all(tf.equal(coord, 0), axis=-1) # coord_temp = tf.concat([tf.minimum(tf.maximum(1 - _coord[:, :1], 0), 1), # _coord[:, 1:]], axis=-1) # coord_flipped = tf.where(idx_invalid, zeros, coord_temp) # return img_flipped, coord_flipped # # img, coord = tf.cond(rnd, lambda: (tf.identity(img), tf.identity(coord)), # lambda: flip_img_coord(img, coord)) if coord is None: continue # coord_batch += [np.expand_dims(coord, 0)] # img_batch += [np.expand_dims(img, 0)] # tensor_a = tf.expand_dims(coord, 0) # tensor_b = tf.expand_dims(img, 0) if img_batch is None: img_batch = np.expand_dims(img, 0) else: img_batch = np.concatenate( [img_batch, np.expand_dims(img, 0)], 0) if coord_batch is None: coord_batch = np.expand_dims(coord, 0) else: coord_batch = np.concatenate( [coord_batch, np.expand_dims(coord, 0)], 0) k += 1 except Exception as e: pass # print('shuffle error', e) # print('image:{} has illegal shape'.format(image_id)) # continue # coord_batch = np.concatenate(coord_batch, 0) # img_batch = np.concatenate(img_batch, 0) if k == batch_size: yield img_batch, coord_batch k = 0 img_batch = None coord_batch = None
i, j = l, r pivot = arr[(l + r) // 2] while i <= j: while arr[i] < pivot: i += 1 while arr[j] > pivot: j -= 1 if i <= j: arr[i], arr[j] = arr[j], arr[i] i += 1 j -= 1 if l < j: qsort(arr, l, j) if r > i: qsort(arr, i, r) myarr = perm(array_size) arr = myarr b_start = time() bubble = bombelki(arr) b_time = time() - b_start print("bombelek:\t", b_time) # print(rand_arr) arr = myarr s_start = time() selection = select(arr) s_time = time() - s_start print("selection:\t", s_time) # print(selection-sort(rand_arr)) arr = myarr # print(arr) q_start = time()