def attach_to_max(contents): """ Defines commands to send to Max, establishes a connection to its commandPort, then sends the code to inject debugpy """ global run_code config = contents['arguments'] # Format the simulated attach response to send it back to the debugger # while we set up the debugpy in the background attach_code = ATTACH_TEMPLATE.format( debugpy_path=debugpy_path, hostname=config['debugpy']['host'], port=int(config['debugpy']['port']) ) # Format RUN_TEMPLATE to point to the temporary # file containing the code to run run_code = RUN_TEMPLATE.format( dir=dirname(config['program']), file_name=split(config['program'])[1][:-3] or basename(split(config['program'])[0])[:-3] ) # then send attach code log('Sending attach code to Max') send_py_code_to_max(attach_code) log('Successfully attached to Max') # Then start the max debugging threads run(start_debugging, ((config['debugpy']['host'], int(config['debugpy']['port'])),))
def multiply_karatsuba(x, y): """ Multiplies two numbers represented as arrays using the Karatsuba algorithm, falling back on grade school algorithm for the base case :param x: []int :param y: []int :rtype []int """ x, y = match_padding(x, y) a, b = split(x) c, d = split(y) if len(x) == 1: return multiply_simple(x, y) res_1 = multiply_karatsuba(a, c) res_2 = multiply_karatsuba(b, d) partial = multiply_karatsuba(add(a, b), add(c, d)) # res_3 is partial - res_1 - res_2. # To simplify, just add res_1 and res_2 then subtract that sum from partial res_3 = subtract(partial, add(res_1, res_2)) res = add(pad(res_1, len(x), 'right'), res_2, pad(res_3, (len(x) + 1) // 2, 'right')) return res
def main(): import codecs import sys import itertools import math args = parse_args() ilines = [util.preprocess(x, args.lang) for x in codecs.open(args.input, 'r', 'utf-8').readlines()] rlines = [util.preprocess(x, args.lang) for x in codecs.open(args.ref, 'r', 'utf-8').readlines()] if len(ilines) != len(rlines): print("Error: input file has {0} lines, but reference has {1} lines.".format(len(ilines), len(rlines))) sys.exit(1) scores = [] falign = open(args.align, 'w') if args.align is not None else None for lineno, (rline, iline) in enumerate(itertools.izip(ilines, rlines), start=1): if args.force_token_mode: rline, iline = rline.split(), iline.split() else: rline, iline = util.split(rline, args.lang), util.split(iline, args.lang) # iline, rline are list object score, alignment = ter(iline, rline, align=True) if args.align is not None: falign.write('%s\n' % ' '.join(alignment)) scores.append(score) if args.verbose: print("Sentence {0}: {1:.4f}".format(lineno, score)) if args.align is not None: falign.close() average = sum(scores) / len(scores) variance = sum((x - average) ** 2 for x in scores) / len(scores) stddev = math.sqrt(variance) print("Average={0:.4f}, Variance={1:.4f}, Standard Deviation={2:.4f}".format(average, variance, stddev))
def multiply_karatsuba_parallel(x, y, key=None): """ Multiplies two numbers represented as arrays using the Karatsuba algorithm, falling back on grade school algorithm for the base case :param x: []int :param y: []int :rtype []int """ x, y = match_padding(x, y) a, b = split(x) c, d = split(y) # for base case, go simple if len(x) == 1: return multiply_simple(x, y) # for big numbers, go parallel if len(x) > 300: # generate random ids for the subprocess outputs r1 = random.random() r2 = random.random() r3 = random.random() # run the sub-multiplications in parallel p1 = Process(target=multiply_karatsuba_parallel, args=[a, c, r1]) p2 = Process(target=multiply_karatsuba_parallel, args=[b, d, r2]) p3 = Process(target=multiply_karatsuba_parallel, args=[add(a, b), add(c, d), r3]) p1.start() p2.start() p3.start() p1.join() p2.join() p3.join() # get the results res_1 = return_dict[r1] res_2 = return_dict[r2] partial = return_dict[r3] # for smaller numbers, don't bother parallelizing else: res_1 = multiply_karatsuba_parallel(a, c) res_2 = multiply_karatsuba_parallel(b, d) partial = multiply_karatsuba_parallel(add(a, b), add(c, d)) # do the karatsuba shuffle res_3 = subtract(partial, add(res_1, res_2)) res = add(pad(res_1, len(x), 'right'), res_2, pad(res_3, (len(x) + 1) // 2, 'right')) # if we are in parallel mode, write the result to the global dict if key is not None: return_dict[key] = res return res
def main(): """ main function to prepare data for Tiramisu algorithm """ parser = argparse.ArgumentParser( description='reads image sets and augments the data for Tiramisu', prog='data_gen.py <args>') # Required arguments parser.add_argument("-i", "--input", required=True, help="Path to image sets") parser.add_argument("-o", "--output", required=True, help="Path to save test and train files") # Optional arguments parser.add_argument("-r", "--ratio", type=float, default=0.2, help="validation set ratio") # Creating required directories args = vars(parser.parse_args()) if not os.path.exists(args['output'] + '/train/data/'): os.makedirs(args['output'] + '/train/data/') if not os.path.exists(args['output'] + '/validate/data/'): os.makedirs(args['output'] + '/validate/data/') if not os.path.exists(args['output'] + '/train/masks/'): os.makedirs(args['output'] + '/train/masks/') if not os.path.exists(args['output'] + '/validate/masks/'): os.makedirs(args['output'] + '/validate/masks/') if not os.path.exists(args['output'] + '/test/data/'): os.makedirs(args['output'] + '/test/data/') print("Creating an image per video...") combine(args['input'], args['output']) print("Generating a mask per video...") json_to_mask(args['input'], args['output']) print("augmenting the dataset...") slicer(args['output']) rotate(args['output']) transpose(args['output']) # Splitting the dataset into training and validation set split(args['output'], args['ratio'])
def generate(self, choices, offset): choices = choices.detach() b, n, s = choices.size() probs = offset[:, None, :].expand(b, n, s).clone() sel = 1.0 - probs.clone()[~ choices] probs[~ choices] = sel # probs[~ choices] = 1.0 - probs[~ choices].clone() # prob now contains the probability (under offset) of the choices made probs = probs.prod(dim=2, keepdim=True).expand(b, n, s).contiguous() # Generate indices from the chosen offset indices = util.split(choices, self.depth) if n > 1: dups = self.duplicates(indices) probs = probs.clone() probs[dups] = 0.0 probs = probs / probs.sum(dim=1, keepdim=True) return indices, probs
def show_exits(self): exits = self.v_RS[self.v_RM] chars = util.split(exits) # print("> you can go : %s " % chars) ex = "+-----+\n" if "N" in chars: ex += "| N |\n" else: ex += "| |\n" if "W" in chars: ex += "| W" else: ex += "| " if "E" in chars: ex += " E |\n" else: ex += " |\n" if "S" in chars: ex += "| S |\n" else: ex += "| |\n" ex += "+-----+\n" print("> you can go :") print(ex)
def sobel_filter(image): kernelx = np.array(([-1, 0, 1], [-2, 0, 2], [-1, 0, 1])) kernely = np.array(([-1, -2, -1], [0, 0, 0], [1, 2, 1])) if len(image.shape) == 2: gx = conv(image, kernelx) gy = conv(image, kernely) output = abs(gx) + abs(gy) # np.sqrt(gx ** 2 + gy ** 2) slower output[np.where(output > MAX_PIXEL)] = MAX_PIXEL return output.astype(np.uint8) else: r, g, b = util.split(image) rx, ry = conv(r, kernelx), conv(r, kernely) gx, gy = conv(g, kernelx), conv(g, kernely) bx, by = conv(b, kernelx), conv(b, kernely) R = abs(rx) + abs(ry) G = abs(gx) + abs(gy) B = abs(bx) + abs(by) output = util.merge(R, G, B) output[np.where(output > MAX_PIXEL)] = MAX_PIXEL return output.astype(np.uint8)
def show_exits(self, playerLocation): exits = self.v_RS[playerLocation] chars = util.split(exits) # print("> you can go : %s " % chars) ex = "\t\t+-----+\n" if "N" in chars: ex += "\t\t| N |\n" else: ex += "\t\t| |\n" if "W" in chars: ex += "\t\t| W" else: ex += "\t\t| " if "E" in chars: ex += " E |\n" else: ex += " |\n" if "S" in chars: ex += "\t\t| S |\n" else: ex += "\t\t| |\n" ex += "\t\t+-----+\n" print("> you can go :") print(ex)
def mkstemp(self, suffix="", prefix="tmp", dir=None, text=False): fd, name = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=self.join(dir), text=text) dname, fname = util.split(name) if dir: return fd, os.path.join(dir, fname) else: return fd, fname
def harmonic_mean_filter_rgb(image, filter_size): r, g, b = util.split(image) R = harmonic_mean_filter(r, filter_size) G = harmonic_mean_filter(g, filter_size) B = harmonic_mean_filter(b, filter_size) output = util.merge(R, G, B) return output.astype(np.uint8)
def median_filter_rgb(image, filter_size): r, g, b = util.split(image) R = median_filter(r, filter_size) G = median_filter(g, filter_size) B = median_filter(b, filter_size) output = util.merge(R, G, B) return output.astype(np.uint8)
def contraharmonic_mean_filter_rgb(image, filter_size, Q=1): r, g, b = util.split(image) R = contraharmonic_mean_filter(r, filter_size, Q) G = contraharmonic_mean_filter(g, filter_size, Q) B = contraharmonic_mean_filter(b, filter_size, Q) output = util.merge(R, G, B) return output.astype(np.uint8)
def mkstemp(self, suffix='', prefix='tmp', dir=None, text=False): fd, name = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=self.join(dir), text=text) dname, fname = util.split(name) if dir: return fd, os.path.join(dir, fname) else: return fd, fname
def canonpath(root, cwd, myname, auditor=None): '''return the canonical path of myname, given cwd and root''' if util.endswithsep(root): rootsep = root else: rootsep = root + os.sep name = myname if not os.path.isabs(name): name = os.path.join(root, cwd, name) name = os.path.normpath(name) if auditor is None: auditor = pathauditor(root) if name != rootsep and name.startswith(rootsep): name = name[len(rootsep):] auditor(name) return util.pconvert(name) elif name == root: return '' else: # Determine whether `name' is in the hierarchy at or beneath `root', # by iterating name=dirname(name) until that causes no change (can't # check name == '/', because that doesn't work on windows). The list # `rel' holds the reversed list of components making up the relative # file name we want. rel = [] while True: try: s = util.samefile(name, root) except OSError: s = False if s: if not rel: # name was actually the same as root (maybe a symlink) return '' rel.reverse() name = os.path.join(*rel) auditor(name) return util.pconvert(name) dirname, basename = util.split(name) rel.append(basename) if dirname == name: break name = dirname # A common mistake is to use -R, but specify a file relative to the repo # instead of cwd. Detect that case, and provide a hint to the user. hint = None try: if cwd != root: canonpath(root, root, myname, auditor) hint = (_("consider using '--cwd %s'") % os.path.relpath(root, cwd)) except util.Abort: pass raise util.Abort(_("%s not under root '%s'") % (myname, root), hint=hint)
def delete(self, keys_to_delete): split_size = 1000 keys_to_delete = util.split(keys_to_delete, split_size, self.__delete_wrapper) for set in keys_to_delete: params = dict({}) params['Bucket'] = self.__bucket params['Delete'] = {"Objects": set} response = self.__client.delete_objects(**params)
def __call__(self, path, mode="r", text=False, atomictemp=False, notindexed=False): '''Open ``path`` file, which is relative to vfs root. Newly created directories are marked as "not to be indexed by the content indexing service", if ``notindexed`` is specified for "write" mode access. ''' if self._audit: r = util.checkosfilename(path) if r: raise util.Abort("%s: %r" % (r, path)) self.audit(path) f = self.join(path) if not text and "b" not in mode: mode += "b" # for that other OS nlink = -1 if mode not in ('r', 'rb'): dirname, basename = util.split(f) # If basename is empty, then the path is malformed because it points # to a directory. Let the posixfile() call below raise IOError. if basename: if atomictemp: util.ensuredirs(dirname, self.createmode, notindexed) return util.atomictempfile(f, mode, self.createmode) try: if 'w' in mode: util.unlink(f) nlink = 0 else: # nlinks() may behave differently for files on Windows # shares if the file is open. fd = util.posixfile(f) nlink = util.nlinks(f) if nlink < 1: nlink = 2 # force mktempcopy (issue1922) fd.close() except (OSError, IOError) as e: if e.errno != errno.ENOENT: raise nlink = 0 util.ensuredirs(dirname, self.createmode, notindexed) if nlink > 0: if self._trustnlink is None: self._trustnlink = nlink > 1 or util.checknlink(f) if nlink > 1 or not self._trustnlink: util.rename(util.mktempcopy(f), f) fp = util.posixfile(f, mode) if nlink == 0: self._fixfilemode(f) return fp
def delete_message_batch(self, metrics): start = time.time() msgs_to_delete = set(self.__context[c.KEY_SUCCEEDED_MSG_IDS]) print "Total number of messages to delete are {}".format(len(msgs_to_delete)) threadpool = self.__context[c.KEY_THREAD_POOL] url = self.__queue_url msgs_to_delete = util.split(msgs_to_delete, 10, self.__delete_wrapper) for msg_set in msgs_to_delete: threadpool.add(retry.try_with_backoff, self.__context, self.__client.delete_message_batch, QueueUrl=url, Entries=msg_set) threadpool.wait() return int(time.time() - start)
def separate_by_line(): """At Mark's request, format phones as: <FILEID> phone1 phone2 phone3 (corresponding word) phone4 ... """ destfile = path.join(outdir, 'phonbyline.txt') destgold = path.join(outdir, 'phongold.txt') fidmark = open(destfile, 'w') fidgold = open(destgold, 'w') s = [] for pfile, wfile, _, _ in sorted(corpus.get_filesets()): plist = corpus.extract_content(pfile, 'phones') psplit = util.split(plist, lambda x: x[0] in SPLITS) wlist = corpus.extract_content(wfile, 'words') rtot = [] for pspl in psplit: r = [] for word, interval in wlist: phones = [x for x in pspl if geq(x[1].start, interval.start) and leq(x[1].end, interval.end) and x[0] not in IGNORE] if phones: r.append((word, interval, phones)) if r: rtot.append(r) basename = path.splitext(path.basename(pfile))[0] for idx, split in enumerate(rtot): fidmark.write('{f}_{i}'.format(f=basename, i=idx)) s.append(('{f}_{i}'.format(f=basename, i=idx), split)) for word, interval, phones in split: fidmark.write(' ') fidmark.write(' '.join(zip(*phones)[0])) fidmark.write(' ') fidmark.write('({w})'.format(w=word)) fidmark.write('\n') fidgold.write('{f}_{i}'.format(f=basename, i=idx)) for word, interval, phones in split: fidgold.write(' ') fidgold.write(' '.join('{p} {i}'.format(p=p, i=ival) for p, ival in phones)) fidgold.write(' ') fidgold.write('({w} {i})'.format(w=word, i=interval)) fidgold.write('\n') fidmark.close() fidgold.close() with open(path.join(outdir, 'phongold.pkl'), 'wb') as fid: pickle.dump(s, fid, -1)
def conv_filter(image, kernel): if len(image.shape) == 2: return conv(image, kernel) else: r, g, b = util.split(image) R = conv(r, kernel) G = conv(g, kernel) B = conv(b, kernel) output = util.merge(R, G, B) return output.astype(np.uint8)
def delete(self, keys_to_delete): split_size = 1000 keys_to_delete = util.split(keys_to_delete, split_size, self.__delete_wrapper) for set in keys_to_delete: params = dict({}) params['Bucket'] = self.__bucket params['Delete'] = {"Objects": set} util.debug_print("Deleting {}".format(params)) response = self.__client.delete_objects(**params) util.debug_print("Delete response {}".format(response))
def gaussian_filter(image, filter_size, sigma): kernel = gaussian_kernel(filter_size, sigma) if len(image.shape) == 2: return conv(image, kernel) else: r, g, b = util.split(image) R = conv(r, kernel) G = conv(g, kernel) B = conv(b, kernel) output = util.merge(R, G, B) return output.astype(np.uint8)
def predict_via_iterations(xtrain, ytrain, xtest, ytest, regclass, speed, iterations, boundary): A, Y = csvio.load_train_data(xtrain, ytrain) A, Y = util.symmetric_permutation(A, Y) A_train, A_control = util.split(A, boundary) Y_train, Y_control = util.split(Y, boundary) regression = regclass(A_train, Y_train) losses, losses_control = train_via_iterations(regression, speed, iterations, A_control, Y_control) A_test = csvio.load_regressors(xtest) prediction = regression.predict(A_test) np.savetxt(ytest, prediction, fmt='%.5e', delimiter=',') plt.figure() plt.ylabel('loss') plt.xlabel('iterations') plt.plot(range(1, len(losses) + 1), losses, label='losses during studies') plt.plot(range(1, len(losses_control) + 1), losses_control, label='loss during control') plt.legend(loc='upper right', framealpha=0.95) plt.draw() plt.pause(0.001)
def mean_filter(image, filter_size): kernel = np.ones((filter_size, filter_size)) * (1.0 / (filter_size**2)) if len(image.shape) == 2: return conv(image, kernel) else: r, g, b = util.split(image) R = conv(r, kernel) G = conv(g, kernel) B = conv(b, kernel) output = util.merge(R, G, B) return output.astype(np.uint8)
def phongold(): for phnfile, _, _, _ in get_filesets(): bname = path.splitext(path.basename(phnfile))[0] for idx, pair in enumerate(util.split(extract_content(phnfile, 'phones'), lambda x: x[0] == '__')): try: phones, intervals = zip(*pair) except ValueError as e: print bname, pair raise e yield bname + '_{0}'.format(idx), phones, intervals
def estimate(trainX, trainY, resample_num): sample_pos_means = [] sample_pos_covs = [] sample_neg_means = [] sample_neg_covs = [] for i in xrange(resample_num): [sampledX, sampledY] = util.resample(trainX, trainY) [positiveX, negativeX] = util.split(sampledX, sampledY) sample_pos_means.append(np.mean(positiveX, 0)) sample_neg_means.append(np.mean(negativeX, 0)) sample_pos_covs.append(np.cov(np.array(positiveX).T)) sample_neg_covs.append(np.cov(np.array(negativeX).T)) nominal_pos_mean = np.mean(sample_pos_means, 0) nominal_neg_mean = np.mean(sample_neg_means, 0) nominal_pos_cov = np.mean(sample_pos_covs, 0) nominal_neg_cov = np.mean(sample_neg_covs, 0) sample_pos_means_cov = np.cov(np.array(sample_pos_means).T) sample_neg_means_cov = np.cov(np.array(sample_neg_means).T) #log(sample_pos_means_cov) #log(sample_neg_means_cov) np.linalg.cholesky(sample_pos_means_cov + np.eye(sample_pos_means_cov.shape[0]) * 1e-8) np.linalg.cholesky(sample_neg_means_cov + np.eye(sample_neg_means_cov.shape[0]) * 1e-8) P_pos = np.linalg.inv(sample_pos_means_cov + np.eye(sample_pos_means_cov.shape[0]) * 1e-8) / len(trainX) P_neg = np.linalg.inv(sample_neg_means_cov + np.eye(sample_pos_means_cov.shape[0]) * 1e-8) / len(trainX) np.linalg.cholesky(P_pos + np.eye(sample_neg_means_cov.shape[0]) * 1e-3) np.linalg.cholesky(P_neg + np.eye(sample_neg_means_cov.shape[0]) * 1e-3) rho_pos = 0 rho_neg = 0 for cov_matrix in sample_pos_covs: dis = util.F_norm(cov_matrix - nominal_pos_cov) rho_pos = max(dis, rho_pos) for cov_matrix in sample_neg_covs: dis = util.F_norm(cov_matrix - nominal_neg_cov) rho_neg = max(dis, rho_neg) return [ nominal_pos_mean, P_pos, nominal_neg_mean, P_neg, nominal_pos_cov, rho_pos, nominal_neg_cov, rho_neg ]
def delete_message_batch(self, metrics, queue_url=None): start = time.time() msgs_to_delete = set(metrics) threadpool = self.__context[c.KEY_THREAD_POOL] url = queue_url or self.__queue_url msgs_to_delete = util.split(msgs_to_delete, 10, self.__delete_wrapper) for msg_set in msgs_to_delete: threadpool.add(retry.try_with_backoff, self.__context, self.__client.delete_message_batch, QueueUrl=url, Entries=msg_set) threadpool.wait() return int(time.time() - start)
def prepare_data(df): df = u.shuffle(df, 999) X, Y = u.xy(df) scaler = preprocessing.MinMaxScaler() scaler.fit(X) df_tr, df_te = u.split(df, 0.75) X_tr, Y_tr = u.xy(df_tr) X_te, Y_te = u.xy(df_te) X_te_norm = scaler.transform(X_te) X_tr_norm = scaler.transform(X_tr) return X_tr_norm, Y_tr, X_te_norm, Y_te
def on_btnGenSql_clicked(self,widget): ''' SQL생성.''' tb = self.tbColEng mydict = util.load_file("dict.txt") assert isinstance(tb , gtk.TextBuffer) s = tb.get_text(tb.get_start_iter(), tb.get_end_iter()) cols = util.split(s) #self.log(crud.mk_insert(cols)) #self.log('\n\n') self.log(crud.mk_insert2(mydict, cols)) self.log('\n\n') self.log(crud.mk_update(mydict, cols)) self.log('\n\n') self.log(crud.mk_select(mydict, cols)) self.log('\n\n')
def laplacian_filter(image, diagonal=True): if diagonal: kernel = np.array(([-1, -1, -1], [-1, 8, -1], [-1, -1, -1])) else: kernel = np.array(([0, 1, 0], [1, -4, 1], [0, 1, 0])) if len(image.shape) == 2: return conv(image, kernel) else: r, g, b = util.split(image) R = conv(r, kernel) G = conv(g, kernel) B = conv(b, kernel) output = util.merge(R, G, B) return output.astype(np.uint8)
def prepare_data(df): df = u.shuffle(df, 999) df_train, df_test = u.split(df, 0.75) X_train, Y_train = u.xy(df_train) X_test, Y_test = u.xy(df_test) X_train = preprocessing.maxabs_scale(X_train) X_test = preprocessing.maxabs_scale(X_test) ones = np.ones((X_train.shape[0], 1)) X_train = np.hstack((X_train, ones)) ones = np.ones((X_test.shape[0], 1)) X_test = np.hstack((X_test, ones)) return X_train, Y_train, X_test, Y_test
def on_btnTransText_clicked(self, widget): self.log("텍스트변환시작\n") tb = self.tbColHan s = tb.get_text(tb.get_start_iter(), tb.get_end_iter()) cols = util.split(s) msg ='' for col in cols: camel = util.underscore_to_camel(col.upper()) msg += col + '\n' msg += util.camel_to_underscore(col).upper() + '\n' msg += camel + '\n' msg += camel[0:1].upper() + camel[1:] + '\n' msg += '\n' self.log(msg); self.log('\n\n')
def canonpath(root, cwd, myname, auditor=None): """return the canonical path of myname, given cwd and root""" if util.endswithsep(root): rootsep = root else: rootsep = root + os.sep name = myname if not os.path.isabs(name): name = os.path.join(root, cwd, name) name = os.path.normpath(name) if auditor is None: auditor = pathauditor(root) if name != rootsep and name.startswith(rootsep): name = name[len(rootsep) :] auditor(name) return util.pconvert(name) elif name == root: return "" else: # Determine whether `name' is in the hierarchy at or beneath `root', # by iterating name=dirname(name) until that causes no change (can't # check name == '/', because that doesn't work on windows). The list # `rel' holds the reversed list of components making up the relative # file name we want. rel = [] while True: try: s = util.samefile(name, root) except OSError: s = False if s: if not rel: # name was actually the same as root (maybe a symlink) return "" rel.reverse() name = os.path.join(*rel) auditor(name) return util.pconvert(name) dirname, basename = util.split(name) rel.append(basename) if dirname == name: break name = dirname raise util.Abort(_("%s not under root '%s'") % (myname, root))
def __mkFrame(self,frameType=0x10,frameId=0x01, destAdress16=[0xFF,0xFF],broadCastRadio=0x00, option=0x00): destAdress = self.__destAdress header = [0x7E,0x00,0x00,frameType,frameId] if frameType == 0x10: frame = header frame += destAdress + destAdress16 frame.append(broadCastRadio) frame.append(option) for i in self.__outBuffer: if type(i) == int: frame.append(i) else: frame.append(ord(i)) frame[1],frame[2] = split(len(frame[3:])) frame.append(self.__chkSum(frame)) return frame
def estimate(trainX, trainY, resample_num): sample_pos_means = [] sample_pos_covs = [] sample_neg_means = [] sample_neg_covs = [] for i in xrange(resample_num): [sampledX, sampledY] = util.resample(trainX, trainY) [positiveX, negativeX] = util.split(sampledX, sampledY) sample_pos_means.append(np.mean(positiveX, 0)) sample_neg_means.append(np.mean(negativeX, 0)) sample_pos_covs.append(np.cov(np.array(positiveX).T)) sample_neg_covs.append(np.cov(np.array(negativeX).T)) nominal_pos_mean = np.mean(sample_pos_means, 0) nominal_neg_mean = np.mean(sample_neg_means, 0) nominal_pos_cov = np.mean(sample_pos_covs, 0) nominal_neg_cov = np.mean(sample_neg_covs, 0) sample_pos_means_cov = np.cov(np.array(sample_pos_means).T) sample_neg_means_cov = np.cov(np.array(sample_neg_means).T) #log(sample_pos_means_cov) #log(sample_neg_means_cov) np.linalg.cholesky(sample_pos_means_cov+ np.eye(sample_pos_means_cov.shape[0]) * 1e-8) np.linalg.cholesky(sample_neg_means_cov+ np.eye(sample_neg_means_cov.shape[0]) * 1e-8) P_pos = np.linalg.inv(sample_pos_means_cov + np.eye(sample_pos_means_cov.shape[0]) * 1e-8) / len(trainX) P_neg = np.linalg.inv(sample_neg_means_cov + np.eye(sample_pos_means_cov.shape[0]) * 1e-8) / len(trainX) np.linalg.cholesky(P_pos+ np.eye(sample_neg_means_cov.shape[0]) * 1e-3) np.linalg.cholesky(P_neg+ np.eye(sample_neg_means_cov.shape[0]) * 1e-3) rho_pos = 0 rho_neg = 0 for cov_matrix in sample_pos_covs: dis = util.F_norm(cov_matrix - nominal_pos_cov) rho_pos = max(dis, rho_pos) for cov_matrix in sample_neg_covs: dis = util.F_norm(cov_matrix - nominal_neg_cov) rho_neg = max(dis, rho_neg) return [nominal_pos_mean, P_pos, nominal_neg_mean, P_neg, nominal_pos_cov, rho_pos, nominal_neg_cov, rho_neg]
def __call__(self, path, mode="r", text=False, atomictemp=False): if self._audit: r = util.checkosfilename(path) if r: raise util.Abort("%s: %r" % (r, path)) self.audit(path) f = self.join(path) if not text and "b" not in mode: mode += "b" # for that other OS nlink = -1 if mode not in ("r", "rb"): dirname, basename = util.split(f) # If basename is empty, then the path is malformed because it points # to a directory. Let the posixfile() call below raise IOError. if basename: if atomictemp: util.ensuredirs(dirname, self.createmode) return util.atomictempfile(f, mode, self.createmode) try: if "w" in mode: util.unlink(f) nlink = 0 else: # nlinks() may behave differently for files on Windows # shares if the file is open. fd = util.posixfile(f) nlink = util.nlinks(f) if nlink < 1: nlink = 2 # force mktempcopy (issue1922) fd.close() except (OSError, IOError), e: if e.errno != errno.ENOENT: raise nlink = 0 util.ensuredirs(dirname, self.createmode) if nlink > 0: if self._trustnlink is None: self._trustnlink = nlink > 1 or util.checknlink(f) if nlink > 1 or not self._trustnlink: util.rename(util.mktempcopy(f), f)
def __call__(self, path, mode="r", text=False, atomictemp=False): if self._audit: r = util.checkosfilename(path) if r: raise util.Abort("%s: %r" % (r, path)) self.audit(path) f = self.join(path) if not text and "b" not in mode: mode += "b" # for that other OS nlink = -1 if mode not in ('r', 'rb'): dirname, basename = util.split(f) # If basename is empty, then the path is malformed because it points # to a directory. Let the posixfile() call below raise IOError. if basename: if atomictemp: util.ensuredirs(dirname, self.createmode) return util.atomictempfile(f, mode, self.createmode) try: if 'w' in mode: util.unlink(f) nlink = 0 else: # nlinks() may behave differently for files on Windows # shares if the file is open. fd = util.posixfile(f) nlink = util.nlinks(f) if nlink < 1: nlink = 2 # force mktempcopy (issue1922) fd.close() except (OSError, IOError), e: if e.errno != errno.ENOENT: raise nlink = 0 util.ensuredirs(dirname, self.createmode) if nlink > 0: if self._trustnlink is None: self._trustnlink = nlink > 1 or util.checknlink(f) if nlink > 1 or not self._trustnlink: util.rename(util.mktempcopy(f), f)
def __mkFrame(self, frameType=0x10, frameId=0x01, destAdress16=[0xFF, 0xFF], broadCastRadio=0x00, option=0x00): destAdress = self.__destAdress header = [0x7E, 0x00, 0x00, frameType, frameId] if frameType == 0x10: frame = header frame += destAdress + destAdress16 frame.append(broadCastRadio) frame.append(option) for i in self.__outBuffer: if type(i) == int: frame.append(i) else: frame.append(ord(i)) frame[1], frame[2] = split(len(frame[3:])) frame.append(self.__chkSum(frame)) return frame
def build_structure(session, file_path, root=None): if not root: root = get_root(session) print "path to root: " + path_to_root print "file_path: " + file_path path = util.path_relative_to(path_to_root, file_path) pathsplit = util.split(path) file_segment = pathsplit.pop() cumulative_path = "" root.update_hash() parent = root for segment in pathsplit: cumulative_path = os.path.join(cumulative_path, segment) folder_q = session.query(Folder) \ .filter(Folder.path == cumulative_path) if folder_q.count(): folder = folder_q.one() folder.update_hash() parent = folder else: parent = Folder(cumulative_path, parent) f = File(path, parent) add_to_artist(session, f)
def build_structure(session, file_path, root = None): if not root: root = get_root(session) print "path to root: " + path_to_root print "file_path: " + file_path path = util.path_relative_to(path_to_root, file_path) pathsplit = util.split(path) file_segment = pathsplit.pop() cumulative_path = "" root.update_hash() parent = root for segment in pathsplit: cumulative_path = os.path.join(cumulative_path, segment) folder_q = session.query(Folder) \ .filter(Folder.path == cumulative_path) if folder_q.count(): folder = folder_q.one() folder.update_hash() parent = folder else: parent = Folder(cumulative_path, parent) f = File(path, parent) add_to_artist(session, f)
def on_btnDefMap_clicked(self,widget): mydict = util.load_file("dict.txt") tb = self.tbColHan assert isinstance(tb , gtk.TextBuffer) s = tb.get_text(tb.get_start_iter(), tb.get_end_iter()) a = s.splitlines() if( len(a) <2 ): a = util.split(s) msg = 'SQL의 SELECT구문을 이용하기 바랍니다\n\n' msg += 'BufMap defValMap = new BufMap();\n' msg += 'defValMap\n' for l in a: l = re.sub('/\*.*\*/','',l) l = l.strip() t = self.get_token(l) engname = t t = '"' + t + '"' hanname = util.eng_to_han_one(mydict,engname ) msg += '\t\t.put(%-30s,"0")\t\t// %s\n' %(t,hanname) msg += '\t\t;\n' self.log("\n\n") self.log(msg) self.log("\n\n")
s += "</Rows>\n\n" return s if __name__ == '__main__': pass #s ="col1,col2,col3,col4,col5,col6,col7" #r = """안양농원; 정읍시 고부면 고부리 1234;1,000; 혼파;100;홍길동;정읍시 고부면 고부리""" #s ="col1,col2,col3,col4,col5,col6,col7,col8,col9" cols = 12 r = """A등급 100""" rows = util.split(r) #s = util.get_col_str(len(rows)) s= 'DISEN_SN,BSNS_YEAR,IHIDNUM,BIZRNO,TLPHON_NO,MOBLPHON_NO,EMAIL,DELNG_BANK_CODE,ACNUT_NO,ACNUT_OWNER_NM,FXNUM,RPRSNTV_ID,RPRSNTV_TLPHON_NO,RPRSNTV_MOBLPHON_NO,FARMNG_MNGMTSYS_ID' cols = s.split(',') print "BINDING INFO...\n\n"; print get_cell_bind(cols)
def __init__ (self, game, event_handler): self.game = game # input event_handler.add_event_handlers({ pg.MOUSEBUTTONDOWN: self.click, pg.MOUSEMOTION: self.set_current_from_mouse }) event_handler.add_key_handlers([ (conf.KEYS_BACK, self.quit, eh.MODE_ONDOWN), (conf.KEYS_NEXT, self.select, eh.MODE_ONDOWN), ] + [ (ks, [(self.move, (i,))], eh.MODE_ONDOWN_REPEAT, 15, 7) for i, ks in enumerate(conf.KEYS_MOVE) ]) game.linear_fade(*conf.LS_FADE_IN) # generate unlocked list unlocked = [] n_stars = sum(len(lvl.get('stars', [])) for lvl in conf.LEVELS) got_stars = 0 for ID, i in conf.STARS: if len(conf.LEVELS) > ID and len(conf.LEVELS[ID].get('stars', [])) > i: got_stars += 1 secret = [i for i in xrange(len(conf.LEVELS)) if i not in conf.EXISTS] require = split(n_stars, len(secret)) req = 0 for ID, this_req in zip(secret, require): req += this_req if got_stars >= req: unlocked.append(ID) if conf.DEBUG: print '{0}/{1} stars; need {2}'.format(got_stars, n_stars, require) # generate level thumbnails ids = set(conf.EXISTS + unlocked) self.num_levels = n = len(ids) self.cols = cols = min(i for i in xrange(n + 1) if i * i >= n) self.rows = rows = n / cols + bool(n % cols) self.levels = levels = [] self.level_ids = level_ids = {} w, h = conf.RES ws = split(w, cols) hs = split(ir(h * float(rows) / cols), rows) x = w_i = h_i = 0 y = (h - sum(hs)) / 2 draw_sfc = pg.Surface(conf.RES) vertical_order = [] row = [] vertical_order.append(row) for j, i in enumerate(ids): row.append(i) rect = pg.Rect((x, y, ws[w_i], hs[h_i])).inflate(-2, -2) # generate image sfc = pg.Surface(rect[2:]) l = level.Level(game, None, i) l.draw(draw_sfc) sfc = pg.transform.smoothscale(draw_sfc, rect[2:]) # dim or brighten surface if i in conf.COMPLETED_LEVELS: mod_sfc = pg.Surface(rect[2:]).convert_alpha() mod_sfc.fill(conf.LS_WON_OVERLAY) sfc.blit(mod_sfc, (0, 0)) level_ids[i] = j levels.append((i, rect, sfc.convert())) # get next rect x += ws[w_i] w_i += 1 if w_i == cols: row = [] vertical_order.append(row) x = w_i = 0 y += hs[h_i] h_i += 1 self.last = 0 self.last_current = self.current = None self.changed = False self.set_current_from_mouse() self.vertical_order = sum([[row[i] for row in vertical_order if len(row) > i] for i in xrange(cols)], []) self.finished = False
'REMAINING', duration_to_string(remaining_time), 'TOTAL', duration_to_string(total_time)) frogged_filename = util.filename_without_extension(filename, '.txt') with open(OUTPUT_FOLDER+frogged_filename+'.frog.out', 'w') as f: f.write(output) if __name__ == '__main__': INPUT_FOLDER = '../data/plaintext/' OUTPUT_FOLDER = '../data/frogged/' files = util.todo_filepaths(INPUT_FOLDER, '.txt', OUTPUT_FOLDER, '.frog.out') files = sorted(files)[::-1] if os.path.exists('../data/frog_todo.p'): print ("USING FROG TODO!") with open('../data/frog_todo.p', 'rb') as f: files = pickle.load(f) files = [s.replace('\\','/') for s in files] n_processes = 2 print ("N_CPU", util.CPU_COUNT, " N PROCESSES", n_processes) file_chunks = util.split(files, n_processes) pool = Pool(processes=n_processes) pool.map(frog_process_files, file_chunks) pool.join() pool.close()
def to_func(smpls, mtd): buf = cStringIO.StringIO() if C.mod.GN in mtd.mods: buf.write(C.mod.GN + ' ') elif C.mod.HN in mtd.mods: buf.write(C.mod.HN + ' ') ret_ty = trans_ty(mtd.typ) cname = unicode(repr(mtd.clazz)) mname = mtd.name arg_typs = mtd.param_typs buf.write(ret_ty + ' ' + trans_mname(cname, mname, arg_typs) + '(') @takes(tuple_of(unicode)) @returns(unicode) def trans_param( (ty, nm) ): return ' '.join([trans_ty(ty), nm]) # for instance methods, add "this" pointer into parameters if mtd.is_static: params = mtd.params[:] else: self_ty = trans_ty(unicode(repr(mtd.clazz))) params = [ (self_ty, C.SK.self) ] + mtd.params[:] # add "logging" flag into parameters # to check log conformity only if invocations cross the boundary if not mtd.is_init and not mtd.is_clinit: params.append( (C.SK.z, u"logging") ) if len(params) > 0: buf.write(", ".join(map(trans_param, params))) buf.write(") {\n") # once function signature is dumped out, remove "logging" flag if not mtd.is_init and not mtd.is_clinit: params.pop() clss = util.flatten_classes([mtd.clazz], "subs") logged = (not mtd.is_init) and sample.mtd_appears(smpls, clss, mtd.name) mid = unicode(repr(mtd)) m_ent = mid + "_ent()" m_ext = mid + "_ext()" if logged: global _mids _mids.add(mid) if logged: # logging method entry (>) _log_params = map(log_param, params) _retrievals, _hashes = util.split([(u'', m_ent)] + _log_params) ent_retrievals = util.ffilter(_retrievals) ent_hashes = util.ffilter(_hashes) buf.write("""{} int[P] __params = {{ {} }}; if (logging) check_log@log(__params); """.format(u''.join(ent_retrievals), u", ".join(ent_hashes))) is_void = C.J.v == mtd.typ if mtd.body: if not is_void and not mtd.is_init: bodies = mtd.body[:-1] # exclude the last 'return' statement else: bodies = mtd.body buf.write('\n'.join(map(partial(trans_s, mtd), bodies))) if logged: # logging method exit (<) _log_params = [] if mtd.body and not is_void and not mtd.is_init: ret_v = mtd.body[-1].e ret_u = unicode(trans_e(mtd, ret_v)) # retrieve the return value to a temporary variable buf.write(u""" {} __ret = {}; """.format(ret_ty, ret_u)) # then, try to obtain a hash from that temporary variable _log_params.append(log_param( (ret_ty, u"__ret") )) _retrievals, _hashes = util.split([(u'', m_ext)] + _log_params) ext_retrievals = util.ffilter(_retrievals) ext_hashes = util.ffilter(_hashes) buf.write("""{} __params = {{ {} }}; if (logging) check_log@log(__params); """.format(u''.join(ext_retrievals), u", ".join(ext_hashes))) if mtd.body and not is_void and not mtd.is_init: buf.write(os.linesep) if logged: # return the return value stored at the temporary variable buf.write("return __ret;") else: buf.write(trans_s(mtd, mtd.body[-1])) if mtd.is_init: evt_srcs = map(util.sanitize_ty, sample.evt_sources(smpls)) cname = unicode(repr(mtd.clazz)) if cname in evt_srcs: global _inits _inits.add(cname) buf.write("\nreturn {};".format(C.SK.self)) buf.write("\n}\n") return buf.getvalue()
def get_cols_han(self): tb = self.tbColHan assert isinstance(tb , gtk.TextBuffer) s = tb.get_text(tb.get_start_iter(), tb.get_end_iter()) cols = util.split(s) return cols
s += "</Band>\n" s += "\n" return s if __name__ == '__main__': pass #head = "등급,생산량(kg)" head = "등급 자부담비율(%) 건초->사일리지 변환 가중치 톤당 지원금액" # 탭이 있으면, 탭으로 분리하도록 처리함. ahead = util.split(head) body = util.get_col_str(len(ahead)) abody = body.split(",") print get_columns_bylen(ahead) # head band print get_band_head(ahead)
def mainRobustFisherLDAtest(dataset, alpha, resample_num=100, split_token=','): data_file = dataset + '/' + dataset + '.data' data_loader = load.loader(file_name = data_file, split_token = split_token) [dataX, dataY] = data_loader.load() dimension = data_loader.dimension [trainX, trainY, testX, testY] = util.divide(dataX, dataY, alpha) [pos_mean, pos_P, neg_mean, neg_P, pos_cov, pos_rho, neg_cov, neg_rho] = estimate(trainX, trainY, resample_num) M = pos_cov + neg_cov + np.eye(dimension) * (pos_rho + neg_rho) M0 = np.linalg.inv(M) # minus = np.concatenate((np.eye(dimension), -np.eye(dimension)), axis = 1) # choose_pos = np.concatenate((np.eye(dimension), np.zeros([dimension, dimension])), axis = 1) # choose_neg = np.concatenate((np.zeros([dimension, dimension]), np.eye(dimension)), axis = 1) # M0 = np.dot(minus.T, np.dot(M, minus)) # M1 = np.dot(choose_pos.T, np.dot(pos_P, choose_pos)) # M2 = np.dot(choose_neg.T, np.dot(neg_P, choose_neg)) # sol = QCQP.qcqprel(P = {'P0':matrix(M0), 'b0':None, 'c0':0.0}, # G = {'P':[matrix(M1), matrix(M2)], 'b':[None] * 2, 'c':[0.0,] * 2, # 'Peq':[], 'beq':[], 'ceq':[]}) # sol_array = np.array(sol['RQCQPx']) # x_pos_star = sol_array[:dimension] # x_neg_star = sol_array[dimension:] # w = np.dot(M, x_pos_star - x_neg_star) M1 = pos_P M2 = neg_P [train_pos_X, train_neg_X] = util.split(trainX, trainY) k1 = np.mean(train_pos_X, axis = 0).reshape(dimension, 1) k2 = np.mean(train_neg_X, axis = 0).reshape(dimension, 1) k1 = k1 / np.linalg.norm(k1) k2 = k2 / np.linalg.norm(k2) k1_norm = util.M_norm(M1, k1) k2_norm = util.M_norm(M2, k2) x1 = k1 / k1_norm x2 = k2 / k2_norm pos_mean = pos_mean.reshape(dimension, 1) neg_mean = neg_mean.reshape(dimension, 1) while True: tail = np.dot(M0, x1 - x2 + pos_mean - neg_mean) k1_head = (np.eye(dimension) * k1_norm ** 2 - np.dot(M1, np.dot(k1, k1.T))) / (k1_norm ** 3) k2_head = - (np.eye(dimension) * k2_norm ** 2 - np.dot(M2, np.dot(k2, k2.T))) / (k2_norm ** 3) k1_gradient = np.dot(k1_head, tail) k2_gradient = np.dot(k2_head, tail) k1 -= k1_gradient * 0.01 k2 -= k2_gradient * 0.01 #print ('%.9f\t %.9f\t %.9f\t %.9f \t%.9f')% (util.M_norm(M0, x1 + pos_mean - x2 - neg_mean), np.linalg.norm(np.concatenate((k1_gradient, k2_gradient), axis = 0)), util.M_norm(M1, x1), util.M_norm(M2, x2), util.F_norm(x1 + pos_mean - x2 - neg_mean)) if np.linalg.norm(np.concatenate((k1_gradient, k2_gradient), axis = 0)) < 1e-5: break k1_norm = util.M_norm(M1, k1) k2_norm = util.M_norm(M2, k2) x1 = k1 / k1_norm x2 = k2 / k2_norm w = np.dot(M0, x1 - x2 + pos_mean - neg_mean).reshape(dimension) train_pos_mean = np.mean(train_pos_X, axis = 0) train_neg_mean = np.mean(train_neg_X, axis = 0) threshold = np.dot(w, (train_pos_mean + train_neg_mean) / 2.0) positive_lower = True if np.dot(train_pos_mean - train_neg_mean, w) > 0 else False predict = np.zeros(len(testY)) testNum = len(testY) for i in xrange(testNum): value = np.dot(testX[i], w) if (value > threshold) == positive_lower: predict[i] = 1 else: predict[i] = -1 rightNum = 0 for i in xrange(testNum): if predict[i] == testY[i]: rightNum += 1 #print 'Right Radio: %.5f'% (float(rightNum)/float(testNum)) return float(rightNum)/float(testNum)
def takeModifiees(index): isHit = lambda key: isinstance(key, Hit) modifiees, self.undet[index:] = util.split(self.undet[index:], isHit) return modifiees
return s def mk_select(mydict, cols): s = "" s += "SELECT \n" for col in cols: col = col.strip() hanname = util.eng_to_han_one(mydict, col) col += ',' s += "\t{col:25} /* {hanname} */\n".format(col=col,hanname=hanname) s += "\nFROM table" s += "\nWHERE 1=1" s = util.rm_last_one_char(s, ',') return s if __name__ == "__main__": c = 'DISEN_SN,BSNS_YEAR,IHIDNUM,BIZRNO,TLPHON_NO,MOBLPHON_NO,EMAIL,DELNG_BANK_CODE,ACNUT_NO,ACNUT_OWNER_NM,FXNUM,RPRSNTV_ID,RPRSNTV_TLPHON_NO,RPRSNTV_MOBLPHON_NO,FARMNG_MNGMTSYS_ID' c = 'BSNS_YEAR,AGBS_CODE,VILAGE_REQST_SN,CMPTINST_CODE,CMPTNC_LWPRT_INSTT_CODE,VILAGE_NM,RPRSN_TV_NM,RPRSN_TV_ADRES_CODE,RPRSN_TV_ADRES_DETAIL,RPRSN_TV_RN_ADRES_CODE,RPRSN_TV_EMD_SN,RPRSN_TV_RN_ADRES_DETAIL,REQST_DE,REQST_STTUS_CODE,CHARGER_OPINION,USE_AT,LEGALDONG_CODE,REGISTER_ID,REGIST_DT,UPDT_DT,UPDUSR_ID' cols = util.split(c) mydict = util.load_file("dict.txt") print "" print mk_insert(mydict, cols) print "" print mk_insert2(mydict, cols) print "" print mk_update(mydict, cols) print "" print mk_select(mydict, cols)
def __init__(self, *args, **kwargs): for u in self.url: exec "cfg.html_root.%s = self" % "%s.%s" % ('.'.join(u.split("/")[1:]), self.__class__.__name__) cfg.log("Registered page: %s.%s" % ('.'.join(u.split("/")[1:]), self.__class__.__name__))
import matplotlib.pyplot as plt from pandas import DataFrame if len(sys.argv)<2: print 'Usage: python plotData2.py <data_file> (Adim - Bdim)' [X, Y] = load.loader(sys.argv[1]).load() start = 0 dimension = len(X[0]) if len(sys.argv) == 3: parts = sys.argv[2].split('-') start = int(parts[0]) dimension = int(parts[1]) - start + 1 row = int(math.floor(math.sqrt(dimension - 1))) + 1 [posX, negX] = util.split(X, Y) print 'posNum = %d, negNum = %d'%(len(posX), len(negX)) if len(posX) < len(negX): gap = len(negX) - len(posX) for i in xrange(gap): pickup = random.randint(0, len(posX)-1) posX.append(posX[pickup]) else: gap = len(posX) - len(negX) for i in xrange(gap): pickup = random.randint(0, len(negX)-1) negX.append(negX[pickup]) posX = np.array(posX) negX = np.array(negX)
def cost(word, guess, guessHistory): found = util.findStr(util.split(word), guess) if len(found) > 0 or guess in guessHistory: return 0 return 1
def format_phonrec(): # load up the phone recognizer files phonrecdir = path.join(os.environ['HOME'], 'data/output/lrec_buckeye/phonrecoutput', 'res_tri200_simple_S_bigr/') phonrecinput = path.join(phonrecdir, 'refTranscriptions.mlf') phonrecoutput = path.join(phonrecdir, 'modelTranscriptions.mlf') prinput = corpus.readmlf(phonrecinput) proutput = corpus.readmlf(phonrecoutput) # index the input and output by filename prinput = {x[0]: (x[1], x[2]) for x in prinput} proutput = {x[0]: (x[1], x[2]) for x in proutput} # load up the gold transcriptions # with open(path.join(outdir, 'phongold.pkl'), 'rb') as fid: # gold = pickle.load(fid) # build dict from fname to phoneseq to start time phondict = defaultdict(dict) worddict = defaultdict(list) # for e in gold: # fname = e[0].split('_')[0] # phonseq = tuple(map(corpus.fold, reduce(lambda x, y: x+y, # (zip(*p)[0] # for p in zip(*e[1])[2])))) # start_time = e[1][0][1].start # phondict[fname][phonseq] = start_time # for word, interval in zip(zip(*e[1])[0], zip(*e[1])[1]): # worddict[fname].append((word, interval)) for pfile, wfile, _, _ in corpus.get_filesets(): fname = path.splitext(path.basename(pfile))[0] plist = corpus.extract_content(pfile, 'phones', True) for s in util.split(plist, lambda x: x[0] == '__'): if s == []: continue phonseq, intervals = zip(*s) phondict[fname][tuple(phonseq)] = intervals[0].start wlist = corpus.extract_content(wfile, 'words') for word, interval in wlist: worddict[fname].append((word, interval)) ns2sec = 0.0000001 markfile = path.join(outdir, 'phonrec.txt') goldfile = path.join(outdir, 'phonrecgold.txt') fidmark = open(markfile, 'w') fidgold = open(goldfile, 'w') # now line them up rtot = [] missing = 0 for fname in sorted(proutput.keys()): bname = fname.split('_')[0] gold_phones, _ = prinput[fname] pred_phones, pred_intervals = proutput[fname] start_time = phondict[bname][tuple(gold_phones)] pred_intervals = [corpus.Interval(start_time + i.start * ns2sec, start_time + i.end * ns2sec) for i in pred_intervals] words, word_intervals = zip(*worddict[bname]) wrd_idx = 0 phn_idx = 0 # find the starting word for i in range(0, len(words)): if geq(word_intervals[i].end, pred_intervals[phn_idx].start) \ and leq(word_intervals[i].start, pred_intervals[phn_idx].end): wrd_idx = i break else: for word, interval in zip(words, word_intervals): print interval, word print 'PHN:', pred_intervals[phn_idx] print fname print start_time raise ValueError('no suitable word interval found') r = [(words[wrd_idx], word_intervals[wrd_idx], [])] r_idx = 0 while phn_idx < len(pred_phones): if leq(word_intervals[wrd_idx].end, pred_intervals[phn_idx].end): wrd_idx += 1 if wrd_idx >= len(words): break r.append((words[wrd_idx], word_intervals[wrd_idx], [])) r_idx += 1 r[r_idx][2].append((pred_phones[phn_idx], pred_intervals[phn_idx])) phn_idx += 1 rtot.append(r) fidmark.write(fname) for word, interval, plist in r: if plist == []: continue fidmark.write(' ') fidmark.write(' '.join(zip(*plist)[0])) fidmark.write(' ') fidmark.write('({w})'.format(w=word)) fidmark.write('\n') fidgold.write(fname) for word, interval, plist in r: if plist == []: continue fidgold.write(' ') fidgold.write(' '.join('{p} {i}'.format(p=p, i=ival) for p, ival in plist)) fidgold.write(' ') fidgold.write('({w} {i})'.format(w=word, i=interval)) fidgold.write('\n') fidmark.close() fidgold.close() with open(path.join(outdir, 'phonrecoutgold.pkl'), 'wb') as fid: pickle.dump(rtot, fid, -1) print 'MISSING:', missing
def tokenize_text(self, text): "Runs the lexer on text and returns a list of lists of tokens" return map( self.tokenize_line, util.split(text) )
def __chkSum(self,data): chkSum = 0 for i in data[3:]: chkSum += i chkSum = split(0xFF-chkSum)[1] return chkSum