def runfiles(env, checktestonly=False, includemanual=False): stats = {} for file in utils.findall('*rest-test.xml', './cases/'): stat = runfile(file, env, checktestonly=checktestonly, includemanual=includemanual) stats[file] = stat return stats
def locate_test_file(testfile): if os.path.splitext(testfile)[1] in('.xlsx','.xls'): cmd="python exceltoxml.py -i "+testfile os.system(cmd) testfile=os.path.splitext(testfile)[0]+'.xml' files = utils.findall(testfile, '.') if len(files) == 0: logger.error('在当前目录[.]及其子目录下找不到测试文件[%s]。退出测试!' % (testfile)) return elif len(files) > 1: logger.error('在当前目录[.]及其子目录下找到多个测试文件[%s]。退出测试!' % (','.join(files))) return testfile = files[0] logger.info('测试文件路径为:%s' % testfile) return testfile
def _find_packages_urls(architecture): url = "https://archive.archlinux.org/packages/g/glibc/" try: packages_filenames = utils.findall( fr"['\"](?P<filename>glibc-(?:.*?)-{architecture}\.pkg\.tar\.[gx]z)['\"]", url, ) except AttributeError: print(utils.make_warning(f"Problems: {utils.make_bright(url)}")) return [] else: packages_urls = [ os.path.join(url, package_filename) for package_filename in packages_filenames ] return packages_urls
def tfidf_and_wordcounts(protein_all, PID, word_length=2, stride=1): """ 构建蛋白质序列的tfidf和wordcount特征 :param protein_all: 所有蛋白质词向量的序列, pd.DataFrame :param PID: 所有蛋白质ID :param word_length: 词长 :param stride: 步长 :return: tfidf特征和wordcount特征(pd.DataFrame实例) """ # 用词长为word_length, 步长为stride来选择蛋白质文本信息 texts_protein = list(protein_all["Sequence"].apply( lambda x: findall(x.upper(), word_length, stride))) # 合并"蛋白质文本",并用空格隔开每个蛋白质序列的"单词",构建"文本" corpus = list(map(lambda x: " ".join(i for i in x), texts_protein)) # 计算每个"单词"的在蛋白质序列中的 term-frequence and inverse-document-frequence tfidf = TfidfVectorizer(token_pattern=u'(?u)\\b\\w+\\b') tfidf_vals = tfidf.fit_transform(corpus) tfidf_vals = tfidf_vals.toarray() # 计算每个"单词"在每个蛋白质序列出现的次数 counts = CountVectorizer(token_pattern=u'(?u)\\b\\w+\\b') word_counts = counts.fit_transform(corpus) word_counts = word_counts.toarray() del corpus tfidf_vals = pd.DataFrame(tfidf_vals, columns=[ str(word_length) + "_ags_tfidfs_" + str(i) for i in range(tfidf_vals.shape[1]) ]) word_counts = pd.DataFrame(word_counts, columns=[ str(word_length) + "_ags_wordcounts_" + str(i) for i in range(word_counts.shape[1]) ]) tfidf_vals["Protein_ID"] = PID word_counts["Protein_ID"] = PID return tfidf_vals, word_counts
def _find_packages_urls(release, architecture, package): url = f"https://launchpad.net/ubuntu/{release}/{architecture}/{package}" packages_versions = set( utils.findall( fr'"/ubuntu/.+?/{package}/(?P<version>.+?)(?:\.\d+)?"', url)) if not packages_versions: print(utils.make_warning(f"Problems: {utils.make_bright(url)}")) return [] n = 3 most_recent_packages_versions = sorted(packages_versions, reverse=True)[:n] packages_urls = [ utils.search( r"['\"](?P<url>https?.*?libc6.*?.deb)['\"]", f"https://launchpad.net/ubuntu/{release}/{architecture}/{package}/{package_filename}", ).group("url") for package_filename in most_recent_packages_versions ] if not packages_urls: print(utils.make_warning(f"Problems: {utils.make_bright(url)}")) return [] return packages_urls
def protein_embedding(protein_all, word_length=3, stride=1): """ 构建蛋白质词向量特征 :param protein_all: 所有蛋白质词向量的序列, string :param word_length: 词长, int :param stride: 步长, int :return: 蛋白质词向量特征(pd.DataFrame实例) """ texts_protein = list(protein_all["Sequence"].apply( lambda x: findall(x.upper(), word_length, stride))) n = 128 model_protein = Word2Vec(texts_protein, size=n, window=4, min_count=1, negative=3, sg=1, sample=0.001, hs=1, workers=4) vectors = pd.DataFrame( [model_protein[word] for word in (model_protein.wv.vocab)]) vectors['Word'] = list(model_protein.wv.vocab) vectors.columns = ["vec_{0}".format(i) for i in range(0, n)] + ["Word"] wide_vec = pd.DataFrame() result1 = [] aa = list(protein_all['Protein_ID']) for i in range(len(texts_protein)): result2 = [] for w in range(len(texts_protein[i])): result2.append(aa[i]) result1.extend(result2) wide_vec['Id'] = result1 result1 = [] for i in range(len(texts_protein)): result2 = [] for w in range(len(texts_protein[i])): result2.append(texts_protein[i][w]) result1.extend(result2) wide_vec['Word'] = result1 del result1 wide_vec = wide_vec.merge(vectors, on='Word', how='left') wide_vec = wide_vec.drop('Word', axis=1) wide_vec.columns = ['Protein_ID' ] + ["vec_{0}".format(i) for i in range(0, n)] del vectors name = ["vec_{0}".format(i) for i in range(0, n)] feat = pd.DataFrame(wide_vec.groupby(['Protein_ID' ])[name].agg('mean')).reset_index() del wide_vec feat.columns = ["Protein_ID"] + [ str(word_length) + "_mean_ci_{0}".format(i) for i in range(0, n) ] return feat
def refresh(config,dircount): print('refresh start...') if(config.protos_dir == None): config.protos_dir = 'protos' externalProtos = [] if(config.java_out == None): config.java_out = 'src/main/java' if (config.python_out == None): config.python_out = 'src/main/resources' if (config.python_out_test==None): #config.python_out_test='../../../../../test/rest' config.python_out_test=('../'*(dircount) +'test/rest') if(config.external_protos is not None): externalProtos = config.external_protos.split(';') utils.run_cmd('IF NOT EXIST "%s" MKDIR "%s"' % (config.java_out, config.java_out)) utils.run_cmd('IF NOT EXIST "%s" MKDIR "%s"' % (config.python_out, config.python_out)) utils.run_cmd('IF NOT EXIST "%s" MKDIR "%s"' % (config.python_out_test,config.python_out_test)) print('protos directory:%s' % config.protos_dir) print('java output directory:%s' % config.java_out) externalProtosImport = [] externalProtosDir = [] print('copy external protos') for proto in externalProtos: splitArray = proto.split(',') protoPath = splitArray[0] protoImport = splitArray[1] externalProtosImport.append(protoImport) protoDir = protoImport[0:protoImport.rindex('/')].replace('/', '\\') externalProtosDir.append(protoDir) utils.run_cmd('IF NOT EXIST "%s" MKDIR "%s"' % (protoDir, protoDir)) utils.run_cmd('copy /Y %s %s' % (protoPath.replace('/', '\\'), protoImport.replace('/', '\\'))) print('finish copying external protos') print('copy base.proto and protoc.exe...') script_dir = os.path.dirname(os.path.abspath(__file__)) #baseProtoFilePath = os.path.join(script_dir, 'base.proto') #utils.run_cmd('copy /Y %s .' % baseProtoFilePath) toolFilePath2 = os.path.join(script_dir, 'protoc2.exe') utils.run_cmd('copy /Y %s .' % toolFilePath2) toolFilePath3 = os.path.join(script_dir, 'protoc3.exe') utils.run_cmd('copy /Y %s .' % toolFilePath3) print('finish copy') protoFiles = utils.findall('*.proto', config.protos_dir) for protoFile in protoFiles: if contain(externalProtosImport, protoFile) is False: proto_cmd = 'protoc2.exe --java_out=%s %s' % (config.java_out, '.%s' % protoFile.replace(os.path.abspath(os.curdir), '').replace('\\', '/')) python_proto_cmd = 'protoc3.exe --python_out=%s %s' % (config.python_out, '.%s' % protoFile.replace(os.path.abspath(os.curdir), '').replace('\\', '/')) python_test_cmd= 'protoc3.exe --python_out=%s %s' % (config.python_out_test, '.%s' % protoFile.replace(os.path.abspath(os.curdir), '').replace('\\', '/')) print('execute %s' % proto_cmd) utils.run_cmd(proto_cmd) print('execute %s' % python_proto_cmd) utils.run_cmd(python_proto_cmd) print('execute %s' % python_test_cmd) utils.run_cmd(python_test_cmd) print('finish pb code generation') print('clean base.proto and protoc.exe...') #utils.run_cmd('del /Q base.proto') utils.run_cmd('del /Q protoc2.exe') utils.run_cmd('del /Q protoc3.exe') for file in externalProtosImport: utils.run_cmd('del /Q %s' % file.replace('/', '\\')) for dir in externalProtosDir: if len(utils.findall('*.proto', dir)) == 0: utils.run_cmd('rd /Q %s' % dir.replace('/', '\\')) print('finish cleaning') print('refresh finish')
def refresh(config, dircount): print('refresh start...') if (config.protos_dir == None): config.protos_dir = 'protos' externalProtos = [] if (config.java_out == None): config.java_out = 'src/main/java' if (config.python_out == None): config.python_out = 'src/main/resources' if (config.python_out_test == None): #config.python_out_test='../../../../../test/rest' config.python_out_test = ('../' * (dircount) + 'test/rest') if (config.external_protos is not None): externalProtos = config.external_protos.split(';') utils.run_cmd('IF NOT EXIST "%s" MKDIR "%s"' % (config.java_out, config.java_out)) utils.run_cmd('IF NOT EXIST "%s" MKDIR "%s"' % (config.python_out, config.python_out)) utils.run_cmd('IF NOT EXIST "%s" MKDIR "%s"' % (config.python_out_test, config.python_out_test)) print('protos directory:%s' % config.protos_dir) print('java output directory:%s' % config.java_out) externalProtosImport = [] externalProtosDir = [] print('copy external protos') for proto in externalProtos: splitArray = proto.split(',') protoPath = splitArray[0] protoImport = splitArray[1] externalProtosImport.append(protoImport) protoDir = protoImport[0:protoImport.rindex('/')].replace('/', '\\') externalProtosDir.append(protoDir) utils.run_cmd('IF NOT EXIST "%s" MKDIR "%s"' % (protoDir, protoDir)) utils.run_cmd( 'copy /Y %s %s' % (protoPath.replace('/', '\\'), protoImport.replace('/', '\\'))) print('finish copying external protos') print('copy base.proto and protoc.exe...') script_dir = os.path.dirname(os.path.abspath(__file__)) #baseProtoFilePath = os.path.join(script_dir, 'base.proto') #utils.run_cmd('copy /Y %s .' % baseProtoFilePath) toolFilePath2 = os.path.join(script_dir, 'protoc2.exe') utils.run_cmd('copy /Y %s .' % toolFilePath2) toolFilePath3 = os.path.join(script_dir, 'protoc3.exe') utils.run_cmd('copy /Y %s .' % toolFilePath3) print('finish copy') protoFiles = utils.findall('*.proto', config.protos_dir) for protoFile in protoFiles: if contain(externalProtosImport, protoFile) is False: proto_cmd = 'protoc2.exe --java_out=%s %s' % ( config.java_out, '.%s' % protoFile.replace( os.path.abspath(os.curdir), '').replace('\\', '/')) python_proto_cmd = 'protoc3.exe --python_out=%s %s' % ( config.python_out, '.%s' % protoFile.replace( os.path.abspath(os.curdir), '').replace('\\', '/')) python_test_cmd = 'protoc3.exe --python_out=%s %s' % ( config.python_out_test, '.%s' % protoFile.replace( os.path.abspath(os.curdir), '').replace('\\', '/')) print('execute %s' % proto_cmd) utils.run_cmd(proto_cmd) print('execute %s' % python_proto_cmd) utils.run_cmd(python_proto_cmd) print('execute %s' % python_test_cmd) utils.run_cmd(python_test_cmd) print('finish pb code generation') print('clean base.proto and protoc.exe...') #utils.run_cmd('del /Q base.proto') utils.run_cmd('del /Q protoc2.exe') utils.run_cmd('del /Q protoc3.exe') for file in externalProtosImport: utils.run_cmd('del /Q %s' % file.replace('/', '\\')) for dir in externalProtosDir: if len(utils.findall('*.proto', dir)) == 0: utils.run_cmd('rd /Q %s' % dir.replace('/', '\\')) print('finish cleaning') print('refresh finish')