def cleanTrainSet(): from helper import load_file data = load_file("train_set.csv") data = parseDatasetDate(data) data = OHEDataset(data) return data
def cleanBill2(): from helper import load_file from pandas import merge from CleanComponents import cleanComponents from sklearn.decomposition import PCA from pandas import DataFrame billOfComponents = load_file("bill_of_materials.csv") billOfComponents['tube_assembly_id'] = billOfComponents.index components = cleanComponents() components['component_id'] = components.index names = components.columns.values for i in range(1, 9): cols = names + "_" + str(i) components.columns = cols billOfComponents = merge(billOfComponents, components, how='left', on="component_id" + "_" + str(i)) billOfComponents = billOfComponents.drop("component_id" + "_" + str(i), 1) billOfComponents.index = billOfComponents['tube_assembly_id'] billOfComponents = billOfComponents.drop("tube_assembly_id", 1) billOfComponents = billOfComponents.fillna(0) pca = PCA(n_components=20) pca = pca.fit_transform(billOfComponents) billOfComponents = DataFrame(pca, billOfComponents.index) cols = ["pca_" + str(i) for i in billOfComponents.columns.values] billOfComponents.columns = cols return billOfComponents
def loadComponentBase(): from helper import load_file from pandas import merge components = load_file("components.csv") components['component_id'] = components.index components.columns = ['component_name', 'component_type_id', 'component_id'] typeComponents = load_file("type_component.csv") typeComponents['component_type_id'] = typeComponents.index typeComponents.columns = ['component_type_name', 'component_type_id'] components = merge(components, typeComponents, how='left', left_on="component_type_id", right_on="component_type_id") components.index = components.component_id components = components.fillna('Unnamed') return components
def cleanTestSet(): from helper import load_file data = load_file("test_set.csv") data['id'] = data.index data.index = data['tube_assembly_id'] data = data.drop('tube_assembly_id', 1) data = parseDatasetDate(data) data = OHEDataset(data) return data
def loadTubes(): from helper import load_file from pandas import merge tube = load_file("tube.csv") tube['tube_assembly_id'] = tube.index tube_end = load_file("tube_end_form.csv") tube_end['end_id'] = tube_end.index tube_end.columns = ['forming_a', 'end_id'] tube = merge(tube, tube_end, how='left', left_on="end_a", right_on="end_id") tube_end.columns = ['forming_x', 'end_id'] tube = merge(tube, tube_end, how='left', left_on="end_x", right_on="end_id") tube.index = tube.tube_assembly_id tube = tube.drop("tube_assembly_id", 1) tube = tube.drop("end_id_y", 1) tube = tube.drop("end_id_x", 1) return tube
def loadComponentSpecifics(components): from helper import load_file files = ["comp_adaptor.csv", "comp_boss.csv", "comp_elbow.csv", "comp_float.csv", "comp_hfl.csv", "comp_nut.csv", "comp_other.csv", "comp_sleeve.csv", "comp_straight.csv", "comp_tee.csv", "comp_threaded.csv"] for file in files: data = load_file(file) data['component_id'] = data.index data['from_file'] = file if 'component_type_id' in data.columns: data = data.drop('component_type_id', 1) data = data.fillna(0) components = specificMerge(data, components, "component_id") components.index = components.component_id components = components.drop("component_id", 1) return components
def processBill(): from helper import load_file from pandas import concat billOfComponents = load_file("bill_of_materials.csv") billOfComponents['tube_assembly_id'] = billOfComponents.index frames = [billOfComponents[['tube_assembly_id', 'quantity_' + str(i+1), 'component_id_' + str(i+1)]] for i in range(8)] for df in frames: df.columns = ['tube_assembly_id', 'quantity', 'component_id'] billOfComponents = concat(frames) billOfComponents.quantity = billOfComponents.quantity.fillna(0) billOfComponents.component_id = billOfComponents.component_id.fillna("None") billOfComponents['component_number'] = billOfComponents.quantity != 0 billOfComponents['component_number'] = billOfComponents.quantity != 0 billOfComponents.component_number = billOfComponents.component_number.astype(int) billOfComponents.index = billOfComponents.tube_assembly_id billOfComponents = billOfComponents.drop('tube_assembly_id', 1) return billOfComponents
max_source_len = max(source_lens) max_target_len = max(target_lens) for p in ps: source_seq = [w2i_source[w] for w in doc_source[p].split()] + [w2i_source["<PAD>"]] * ( max_source_len - len(doc_source[p].split())) target_seq = [w2i_target[w] for w in doc_target[p].split()] + [w2i_target["<PAD>"]] * ( max_target_len - 1 - len(doc_target[p].split())) + [w2i_target["<EOS>"]] source_batch.append(source_seq) target_batch.append(target_seq) return source_batch, source_lens, target_batch, target_lens if __name__ == '__main__': print 'loading data ...' doc_source = helper.load_file('./data/small_vocab_en.txt') doc_target = helper.load_file('./data/small_vocab_fr.txt') s_token2idx, s_idx2token = helper.load_vocab('./data/small_vocab_en.txt', helper.SOURCE_CODES) t_token2idx, t_idx2token = helper.load_vocab('./data/small_vocab_fr.txt', helper.TARGET_CODES) print 'building model...' config = config() config.source_vocab_size = len(s_token2idx) config.target_vocab_size = len(t_token2idx) model = Seq2seq(config, t_token2idx, useTeacherForcing=True) batches = 10000 print_every = 100 print 'run model...' with tf.Session() as sess: saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) losses = []
def loadSpecs(): from helper import load_file specs = load_file("specs.csv") return specs
helper.step_window = args.win ### ---------- Import Relevand Libraries ---------- import numpy as np from keras.optimizers import RMSprop import sys import random import re from helper import load_file, create_model, sequence_length, add_temperature ### ---------- Load Text File and Build Vocabulary ---------- # note that the data length for training (train.py) and predictions (predict.py) must be the same all_words, unique_words = load_file(args.data) total_num_words = len(all_words) len_vocab = len(unique_words) print('\n----------------------------') print("> Total number of words:\t" + str(total_num_words)) print("> Length of vocabulary:\t\t" + str(len_vocab)) print('----------------------------') word_to_int = dict((c, i) for i, c in enumerate(unique_words)) int_to_word = dict((i, c) for i, c in enumerate(unique_words)) ### ---------- Define Model ---------- num_layers = 1 drop_out_rate = 0.2
def merge(path, branch): git_config(path) click.secho('合并分支') click.secho('工作目录: %s' % (path)) if branch is None: click.secho('缺少参数branch', fg='red') sys.exit(1) click.secho('分支名:%s' % (branch)) # 读取配置文件 yml_path = os.path.join(path, '.gitcli.yml') merge_ignores = [] conflict_resolve_by_self_files = [] conflict_resolve_by_others_files = [] content = load_file(yml_path) if content is not None: temp = yaml.load(content, Loader=yaml.FullLoader) if temp.has_key('merge_ignores'): merge_ignores.extend(temp['merge_ignores']) if temp.has_key('conflict_resolve_by_self_files'): conflict_resolve_by_self_files.extend( temp['conflict_resolve_by_self_files']) if temp.has_key('conflict_resolve_by_others_files'): conflict_resolve_by_others_files.extend( temp['conflict_resolve_by_others_files']) click.secho('.gitcli.yml中配置的合并忽略文件:%s' % (merge_ignores)) click.secho('.gitcli.yml中配置的冲突使用自己解决的文件:%s' % (conflict_resolve_by_self_files)) click.secho('.gitcli.yml中配置的冲突使用对方解决的文件:%s' % (conflict_resolve_by_others_files)) errCode, stdMsg, errMsg = run_command( 'git merge %s --no-commit --no-ff' % (branch), path) if errCode == 0 and stdMsg == 'Already up to date.\n': click.secho('不需要合并', fg='green') return for merge_ignore in merge_ignores: errCode, stdMsg, errMsg = run_command( 'git checkout HEAD -- %s && git reset HEAD %s' % (merge_ignore, merge_ignore), path) if errCode == 0: click.secho('合并忽略文件:%s' % (merge_ignore)) else: click.secho('合并忽略文件:%s %s' % (merge_ignore, errMsg)) errCode, stdMsg, errMsg = run_command('git clean -df', path) if errCode == 0: click.secho('清理不在版本库文件成功', fg='green') else: click.secho('清理不在版本库文件失败', fg='red') # 列出冲突文件 errCode, stdMsg, errMsg = run_command( 'GIT_PAGER=' ' git diff --name-only --diff-filter=U', path) conflict_files = stdMsg.split('\n') while '' in conflict_files: conflict_files.remove('') cannot_fix_conflict_files = [] is_resolve_conflict = False if len(conflict_files) != 0: # 处理冲突 click.secho('冲突文件列表:\n%s' % (stdMsg)) for conflict_file in conflict_files: conflict_file_path = os.path.join(path, conflict_file) if conflict_file in conflict_resolve_by_self_files: # 使用自己解决 newcontent = '' file_obj = open(conflict_file_path) all_lines = file_obj.readlines() in_conflict_head_block = False in_conflict_others_block = False for line in all_lines: if line.startswith('<<<<<<<'): in_conflict_head_block = True continue elif line.startswith('======='): in_conflict_head_block = False in_conflict_others_block = True continue elif line.startswith('>>>>>>>'): in_conflict_others_block = False continue if in_conflict_head_block and not in_conflict_others_block: newcontent += line elif not in_conflict_head_block and in_conflict_others_block: pass else: newcontent += line write_file(conflict_file_path, newcontent) is_resolve_conflict = True click.secho('使用自己解决冲突成功:%s' % (conflict_file), fg='green') elif conflict_file in conflict_resolve_by_others_files: # 使用对方解决 newcontent = '' file_obj = open(conflict_file_path) all_lines = file_obj.readlines() in_conflict_head_block = False in_conflict_others_block = False for line in all_lines: if line.startswith('<<<<<<<'): in_conflict_head_block = True continue elif line.startswith('======='): in_conflict_head_block = False in_conflict_others_block = True continue elif line.startswith('>>>>>>>'): in_conflict_others_block = False continue if in_conflict_head_block and not in_conflict_others_block: pass elif not in_conflict_head_block and in_conflict_others_block: newcontent += line else: newcontent += line write_file(conflict_file_path, newcontent) is_resolve_conflict = True click.secho('使用对方解决冲突成功:%s' % (conflict_file), fg='green') else: cannot_fix_conflict_files.append(conflict_file) if len(cannot_fix_conflict_files) > 0: click.secho('不能解决冲突文件列表:%s' % (cannot_fix_conflict_files), fg='red') sys.exit(1) all_modify_files = [] # 列出没有暂存的修改文件 errCode, stdMsg, errMsg = run_command('GIT_PAGER=' ' git diff --name-only', path) unstaged_modify_files = stdMsg.split('\n') while '' in unstaged_modify_files: unstaged_modify_files.remove('') for modify_file in unstaged_modify_files: errCode, stdMsg, errMsg = run_command('git add \'%s\'' % (modify_file), path) if errCode != 0: click.secho('添加文件到缓存区失败:%s' % (errMsg), fg='red') sys.exit(1) all_modify_files.extend(unstaged_modify_files) # 添加暂存区内的修改文件 errCode, stdMsg, errMsg = run_command( 'GIT_PAGER=' ' git diff --cached --name-only', path) staged_modify_files = stdMsg.split('\n') while '' in staged_modify_files: staged_modify_files.remove('') all_modify_files.extend(staged_modify_files) # 判断是否没有修改文件(如果是解决过冲突后无文件修改不在此范围内) if len(all_modify_files) == 0 and not is_resolve_conflict: click.secho('没有文件修改', fg='green') # git commit errCode, stdMsg, errMsg = run_command( 'git commit -m \'gitcli: merge from %s\'' % (branch), path) if errCode == 0: click.secho('commit成功', fg='green') else: click.secho('commit失败 %s' % (errMsg), fg='red') # git push errCode, stdMsg, errMsg = run_command('git push', path) if errCode == 0: click.secho('合并提交成功', fg='green') else: click.secho('合并提交失败:%s' % (errMsg), fg='red') sys.exit(1)
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes from cryptography.hazmat.backends import default_backend from helper import load_file import base64 import pprint def AES(msg, key): cipher = Cipher(algorithms.AES(key), modes.ECB(), backend=default_backend()) encryptor = cipher.encryptor() return encryptor.update(msg) + encryptor.finalize() def AES_decryptor(ciphertext, key): cipher = Cipher(algorithms.AES(key), modes.ECB(), backend=default_backend()) decryptor = cipher.decryptor() return decryptor.update(ciphertext) + decryptor.finalize() if __name__ == "__main__": key = b"YELLOW SUBMARINE" ciphertext = "".join(load_file("ch7.txt")) ciphertext = base64.b64decode(ciphertext) msg = AES_decryptor(ciphertext, key).decode() print(msg.rstrip())
repDict[substring] += 1 else: repDict[substring] = 0 return sum(repDict.values()) def ecb_detector(cipherText, block): rep_check = reps(cipherText, block) if rep_check > 0: return True else: return False if __name__ == "__main__": block = 16 cipherText = load_file("ch8.txt") for cp in cipherText: ecb = ecb_detector(cp, block) if ecb: ecb_encoded = cp break print("ECB ENCODED MESSAGE:\n", ecb_encoded, "\n")
""" Challenge4: Detect single-character XOR Given a file of hex encoded string that has been xor'd agains a single character. The goal is to find and decrypt the message. """ from challenge3 import breakSingleXOR from helper import load_file # decode the lines in the file fileList = load_file("ch4.txt") fileList = list(bytes.fromhex(hexString) for hexString in fileList) # first for each line return the broken XOR def detectXOR(cipherList): brokenList = [] for i in range(len(fileList)): temp = breakSingleXOR(fileList[i]) brokenList.append(temp) key = lambda x: x[-1] return sorted(brokenList, key=key, reverse=True)[0] if __name__ == "__main__":