def update(args): expression = "is_defined(core.archive_path)" if args.expression: expression += " and (%s)" % args.expression if args.action == 'pull': # only get products with a remote_url if expression: expression = "is_defined(remote_url) and (%s)" % expression else: expression = "is_defined(remote_url)" processor = UpdateProcessor(args) with muninn.open(args.archive) as archive: if args.action in ['ingest', 'pull']: # we only need the uuid and the product_name products = archive.search(expression, property_names=['uuid', 'product_name']) else: products = archive.search(expression, namespaces=archive.namespaces()) if args.parallel: if args.processes is not None: pool = multiprocessing.Pool(args.processes) else: pool = multiprocessing.Pool() list(bar(pool.imap(processor, products), total=len(products))) pool.close() pool.join() else: for product in bar(products): processor.perform_operation(archive, product) return 0
def ingest(args): processor = IngestProcessor(args) with muninn.open(args.archive) as archive: if "-" in args.path: paths = [path for path in sys.stdin] else: paths = args.path total = len(paths) num_success = 0 if args.parallel: if args.processes is not None: pool = multiprocessing.Pool(args.processes) else: pool = multiprocessing.Pool() num_success = sum( list(bar(pool.imap(processor, paths), total=total))) pool.close() pool.join() elif total > 1: for path in bar(paths): num_success += processor.perform_operation(archive, path) elif total == 1: # don't show progress bar if we ingest just one item num_success = processor.perform_operation(archive, paths[0]) return 0 if num_success == total else 1
def update(args): expression = "is_defined(core.archive_path)" if args.expression: expression += " and (%s)" % args.expression namespaces = [] if args.namespaces: for namespace in args.namespaces: namespaces += namespace.split(' ') if args.action == 'pull': # only get products with a remote_url if expression: expression = "is_defined(remote_url) and (%s)" % expression else: expression = "is_defined(remote_url)" with muninn.open(args.archive) as archive: products = archive.search(expression, namespaces=namespaces) if args.parallel: pool = multiprocessing.Pool() list(bar(pool.imap(Processor(args), products), total=len(products))) pool.close() pool.join() else: update_func = Processor(args, archive) for product in bar(products): update_func(product) return 0
def decrypt(self, cipher_data, priv_key, output): # Encrypted text contains information on lenght and size of original message, which is necessary for decoding self.buf = cipher_data.split('::') self.msg_len, self.block_size, self.cipher_blocks = int( self.buf[0]), int(self.buf[1]), self.buf[2].split(',') self.priv_key = self.split_key(priv_key) """ Cipher block = C Plain text block = M Private key[0] = N Private key[1] = D Then: M = C^D mod N """ self.integer_blocks = [] pbar = bar(desc="[Info] Decrypting") for block in self.cipher_blocks: pbar.update(1) plain_block = pow(int(block), int(self.priv_key[2]), int(self.priv_key[1])) self.integer_blocks.append(plain_block) pbar.close() # Call dissasemble method to turn integer blocks back into plain text return super()._disassemble_blocks(self.msg_len, self.block_size, self.integer_blocks)
def encrypt(self, raw_data, pub_key, output, block_size): # Split public key into key parts self.block_size = block_size self.pub_key = self.split_key(pub_key) stdout.write("[Info] Formatting blocks...\n") if int(self.pub_key[0]) < 1024: stdout.write("[Error] Minimum keysize supported is 1024\n") exit() """ Encrypting blocks Cipher block = C Plain text block = M Public key[1] = N Public key[2] = E Then: C = M^E mod N """ self.cipher_blocks = [] pbar = bar(desc="[Info] Encrypting") for block in super()._assemble_raw_blocks(raw_data): pbar.update(1) self.cipher_blocks.append( pow(int(block), int(self.pub_key[2]), int(self.pub_key[1]))) pbar.close() return self.cipher_blocks
def genPrime(s): pbar = bar(desc="[Info] Generating prime pair") while True: pbar.update(1) # generate random number in range of 2^keysize -1 and 2^keysize n = randrange(2**(s - 1), 2**(s)) if isPrime(n): pbar.close() return n
def process(self, archive, args, items): total = len(items) num_success = 0 if args.parallel: num_success = sum(list(bar(_POOL.imap(self, items), total=total))) _POOL.close() _POOL.join() elif total > 1: for item in bar(items): num_success += self.perform_operation(archive, item) elif total == 1: # don't show progress bar if we ingest just one item num_success = self.perform_operation(archive, items[0]) return 0 if num_success == total else 1
def run(env, agent, n_episodes=200, render=False, jupyter=False): episode_scores = [] print(f"####### Training Environment {env}, episodes {n_episodes} #######") pbar = jbar(range(n_episodes)) if jupyter else bar(range(n_episodes)) for episode in pbar: episode_score = run_episode(env, agent, render) episode_scores.append(episode_score) pbar.set_description(f"Episode: {episode + 1}, Score: {episode_score}") return episode_scores
dataset = "DUC06" # path = "/home/orkan/Dropbox/measurements/sherlock/naive adaptive k/" + dataset + "/" path = "/home/orkan/Dropbox/measurements/sherlock/naive adaptive k 2/" + dataset + "/" topics = get_sorted_topics(path) labels = ["0", "0.05", "0.1", "0.15", "0.2", "0.25"] # labels = ["0", "0.25", "0.5", "0.75", "1"] data = {r: {'k': [], 't': [], 'k_norm': [], 'no_of_iterations': []} for r in labels} atts = ['k', 't'] iterations = 10 ############################################################## for topic in bar(topics): reader = MeasurementReader() if topic.startswith("D0611B"): continue # k Bereich pro adaptive k # durchschn. k pro adaptive k # zeit pro Iteration, durchschn. read_logs(reader, path, topic) reader.read_corpora_stats(stat_folder) reader.set_topic_rid() corpus_size = reader.get_corpus_stat("Corpus Size after") for k in reader.run_log['k']: for att in atts:
import pandas as pd import pickle from tqdm import tqdm as bar df = pd.read_html('http://ytenx.org/kyonh/sieux?page='+str(1))[0] df.columns = df.iloc[0] df = df.drop(0, 0) df_list = [df] for i in bar( range(2, 260) ): data = pd.read_html('http://ytenx.org/kyonh/sieux?page='+str(i))[0] data.columns = data.iloc[0] data = data.drop(0, 0) df_list.append(data) df = pd.concat(df_list) df = df.drop('次序', 1) df = df.T df.columns = df.iloc[0] df = df.drop('小韻', 0) pickle.dump(df, open('./pickles/rhyme_list.pkl', 'wb'))
def run_grid(f_lifes, f_falses, Ns, sample, model, sample_args=(), model_args=(), test=tests.MannWhitney, N_runs=500, log=True, filename=None, do_bar=True, N_proc=1): # Print the current model/sample/test try: print("Running: MODEL: {:s} SAMPLE: {:s} TEST: {:s}".format( model_names[model.__name__], sample_names[sample.__name__], tests.test_names[test.__name__])) except: pass # Generates the p value grid for the specific model, sample, and statistical test p, sig_p, nans = np.zeros((len(f_lifes), len(f_falses), len(Ns)), dtype=float), np.zeros( (len(f_lifes), len(f_falses), len(Ns)), dtype=float), np.zeros( (len(f_lifes), len(f_falses), len(Ns)), dtype=int) if do_bar: bar0 = bar else: def bar0(arg): return arg # Calculates and saves the typical p-value and std deviation for a single combination of f_life, f_false, N def run_bin(i, j, k, p, sig_p, nans): p0 = np.zeros(N_runs, dtype=float) for ii in range(N_runs): ages, O2 = draw_sample(f_lifes[i], f_falses[j], int(Ns[k]), sample, model, sample_args=sample_args, model_args=model_args) if O2.sum() > 0 and (~O2).sum() > 0: p0[ii] = test(ages, O2, p_only=True) else: p0[ii] = np.nan # Get the mean and std deviation for this bin and count the number of nans if (~np.isnan(p0)).sum() > 0: if log: p[i, j, k], sig_p[i, j, k] = np.nanmean(np.log10(p0)), np.nanstd( np.log10(p0)) else: p[i, j, k], sig_p[i, j, k] = np.nanmean(p0), np.nanstd(p0) else: p[i, j, k], sig_p[i, j, k] = np.nan, np.nan nans[i, j, k] = np.isnan(p0).sum() # Get every combination of f_life,f_false,N combos = [] for i in range(len(f_lifes)): for j in range(len(f_falses)): for k in range(len(Ns)): combos.append((i, j, k)) # Run each combo sequentially if N_proc == 1: for combo in bar(combos): run_bin(*combo, p, sig_p, nans) # Save the results to a specified file if filename is not None: try: pkl = {} pkl['f_life'], pkl['f_false'], pkl['N'], pkl['nans'] = np.array( f_lifes), np.array(f_falses), np.array(Ns), np.array(nans) if log: pkl['logp'], pkl['sig_logp'] = p, sig_p else: pkl['p'], pkl['sig_p'] = p, sig_p if not os.path.exists(ROOT_DIR + '/results'): os.mkdir(ROOT_DIR + '/results') pickle.dump(pkl, open(filename, 'wb')) print("Saved {:s}".format(filename)) except: print("Error saving file: {:s}".format(filename)) return p, sig_p, nans
label = 'E' pred_lab = VecContext.y2lab(pred) truth_lab = VecContext.y2lab(truth) P = metrics.flat_precision_score(truth_lab, pred_lab, pos_label=label) R = metrics.flat_recall_score(truth_lab, pred_lab, pos_label=label) f1 = metrics.flat_f1_score(truth_lab, pred_lab, pos_label=label) print(clf_name) print( metrics.flat_classification_report(truth_lab, pred_lab, labels=('I', 'E'), digits=4)) csv_table.writerow([clf_name, P, R, f1]) for clf_name, clf in bar(clf_list): clf.fit(x_train, y_train) pred_vec = clf.predict(x_test) report(pred_vec, y_test, result_table, clf_name) # lgb x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.1, shuffle=True) train_data = lgb.Dataset(x_train, y_train) valid_data = lgb.Dataset(x_valid, y_valid, reference=train_data) params = { 'task': 'train',
"ПЛОЩАДЬ ПРОЧИХ ХОЛОДНЫХ ПОМЕЩЕНИЙ", "ПЛОЩАДЬ НЕЖИЛАЯ", "ПЛОЩАДЬ ОБЩАЯ НЕЖИЛЫХ ПОДВАЛОВ", "ПЛОЩАДЬ ОБЩАЯ НЕЖИЛЫХ ЦОКОЛЬНЫХ", "ПЛОЩАДЬ ОБЩАЯ ЖИЛЫХ ПОМЕЩЕНИЙ", "ПЛОЩАДЬ ЖИЛАЯ ЖИЛЫХ ПОМЕЩЕНИЦ", "КОЛИЧЕСТВО ЖИЛЫХ ПОМЕЩЕНИЙ", "КОЛИЧЕСТВО ЖИЛЫХ КОМНАТ", "КОЛИЧЕСТВО ОДНОКОМНАТНЫХ КВАРТИО", "ОБЩАЯ ПЛОЩАДЬ ОДНОКОМНАТНЫХ КВАРТИР", "ЖИЛАЯ ПЛОЩАДЬ ОДНОКОМНАТНЫХ КВАРТИР", "КОЛИЧЕСТВО ДВУХКОМНАТНЫХ КВАРТИО", "ОБЩАЯ ПЛОЩАДЬ ДВУХКОМНАТНЫХ КВАРТИР", "ЖИЛАЯ ПЛОЩАДЬ ДВУХКОМНАТНЫХ КВАРТИР", "КОЛИЧЕСТВО ТРЕХКОМНАТНЫХ КВАРТИО", "ОБЩАЯ ПЛОЩАДЬ ТРЕХКОМНАТНЫХ КВАРТИР", "ЖИЛАЯ ПЛОЩАДЬ ТРЕХКОМНАТНЫХ КВАРТИР", "КОЛИЧЕСТВО ЧЕТЫРЕХКОМНАТНЫХ КВАРТИО", "ОБЩАЯ ПЛОЩАДЬ ЧЕТЫРЕХКОМНАТНЫХ КВАРТИР", "ЖИЛАЯ ПЛОЩАДЬ ЧЕТЫРЕХКОМНАТНЫХ КВАРТИР", "КОЛИЧЕСТВО ПЯТИКОМНАТНЫХ КВАРТИР", "ОБЩАЯ ПЛОЩАДЬ ПЯТИКОМНАТНЫХ КВАРТИР", "ЖИЛАЯ ПЛОЩАДЬ ПЯТИКОМНАТНЫХ КВАРТИР", "ВСЕГО КВАРТИР КОЛИЧЕСТВО", "ВСЕГО В КВАРТИРАХ КОМНАТ", "ВСЕГО В КВАРТИРАХ ПЛОЩАДЬ ОБЩАЯ", "ВСЕГО В КВАРТИРАХ ПЛОЩАДЬ ЖИЛАЯ", "СЕРИЯ ПРОЕКТА", "КОЛИЧЕСТВО БАССЕЙНОВ", "ПРИЗНАК СТУДЕНТЧЕСКОГО ОБЩЕЖИТИЯ", "СЧИТАТЬ ОТДЕЛЬНЫМ КОРПУСОМ", "НАЛИЧИЕ МАНСАРДЫ", "UNOM 2" ] for ind, val in bar(enumerate(true_ans)): if not pd.isna(val): excel_2.loc[ind, colums] = excel_1.iloc[int(val)][colums] excel_2.to_excel("5.xlsx")
old_pop = init_fitness( old_pop, n_pop, bounds, diameter, height, z_0, windspeed_array, theta_array, wind_prob, ) a = time() try: for gen in bar(range(generations)): elite(new_pop, old_pop, elit_num) cross(new_pop, old_pop, n_pop, n_var, elit_num, cross_num, tour_size) mutate(new_pop, old_pop, n_pop, mutat_num, n_var, mutat_genes, b_range) new_pop = fitness( new_pop, n_pop, elit_num, bounds, diameter, height, z_0, windspeed_array, theta_array, wind_prob, )
def Frequency_Analysis(username, prname): import pandas as pd from collections import Counter from tqdm import tqdm as bar from .utils import Read_Arg_, import_dataframe, export_dataframe input_directory = "/".join([username, prname]) # Non-창민버전 if prname is not None: # prname이 써있는 것 -> 타 함수 내에서 사용 : 딕셔너리 사용. for_cooc = 0 # 순수하게 Frequency_Analysis를 해야할 우 -> ref, input_, output_ = Read_Arg_("Frequency_Analysis") Frequency_Gap = int(ref) / 100 text = import_dataframe(input_) else: # 분석할 textfile을 username에 적는다. for_cooc = 1 ref, _, _ = Read_Arg_("Frequency_Analysis", isind=1) Frequency_Gap = int(ref) / 100 text = import_dataframe(username) def get_contents(item): if item != "": # not_language = re.compile('[^ ㄱ-ㅎㅣ가-힣|a-z|A-Z]+') # item = re.sub(not_language,"",str(item)) contents.append(str(item).lower().strip()) contents = [] tag_contents = [] text.contents.apply(get_contents) for token in contents: # 요 파트를 스페이스 기준이 아닌걸로 수정해야 한다. for word in str(token).split(" "): if len(str(word)) > 1: tag_contents.append(word) counted_contents = Counter(tag_contents) tag_count = [] for n, c in counted_contents.most_common(): dics = {"tag": n, "count": c} tag_count.append(dics) df_tag_count = pd.DataFrame(tag_count) df_tag_count = df_tag_count[df_tag_count["count"] >= 50].sort_values( by="tag").reset_index(drop=True) iterations = len(df_tag_count) row_num = 0 total = bar(range(iterations - 1), desc="comparing...") for t in total: step = t + 1 std_row = df_tag_count.iloc[row_num] comparison_row = df_tag_count.shift(-1).iloc[row_num] std_tag = str(std_row["tag"]) std_count = std_row["count"] comparison_tag = str(comparison_row["tag"]) comparison_count = comparison_row["count"] if std_tag == comparison_tag[:len(std_tag)]: frequency_gap = abs(std_count - comparison_count) if frequency_gap / std_count < Frequency_Gap: df_tag_count.iloc[row_num + 1, 1] = comparison_count + std_count df_tag_count = df_tag_count[ df_tag_count["tag"] != std_tag].reset_index(drop=True) else: row_num = row_num + 1 continue else: row_num = row_num + 1 continue if step == iterations - 1: break df_tag_count = df_tag_count.sort_values( by="count", ascending=False).reset_index(drop=True) if for_cooc == 0: export_dataframe(df_tag_count, output_) else: pass return df_tag_count