def __init__(self, repo_name, previous, new, workdir): self.name = repo_name self.repodir = workdir + "/" + repo_name + ".git" self.previous = previous self.new = new self.url = self.GITHUB_URL + repo_name + ".git" # Check the remote for refs before attempting anything try: pc = sh.wc(sh.git("ls-remote", "--heads", self.GITHUB_URL + repo_name, previous), "-l").stdout.strip() nc = sh.wc(sh.git("ls-remote", "--heads", self.GITHUB_URL + repo_name, new), "-l").stdout.strip() if pc == "0" or nc == "0": raise Error except sh.ErrorReturnCode: raise Error try: sh.git.clone("--mirror", self.url, self.repodir) except sh.ErrorReturnCode: sys.stderr.write("Failed to mirror repo url: %s\n" % self.url) raise Error try: repo_path = pygit2.discover_repository(self.repodir) self.repo = pygit2.Repository(repo_path) except KeyError: sys.stderr.write("%s is not a git repository\n" % name) raise RuntimeError
def _runTest(self, shards, max_threads): for threads in range(1, max_threads + 1): for shard in range(0, shards): with sh.sudo: outfile = output_file_name(shards, shard, threads) zmap(p=80, T=threads, shards=shards, shard=shard, _out="tempfile") parse("tempfile", _out=outfile) dup_lines = int(wc(uniq(cat(outfile), "-d"), "-l")) self.assertEqual(dup_lines, 0) shard_file = shard_file_name(shards, threads) if shard == 0: cat(outfile, _out=shard_file) else: cat(shard_file, outfile, _out="tempfile") mv("tempfile", shard_file) for threads in range(1, max_threads + 1): shard_file = shard_file_name(shards, threads) num_lines = int(wc(cat(shard_file), "-l")) self.assertEqual(num_lines, TestSharding.NUM_IPS) dup_lines = int( wc(uniq(sh.sort(cat(shard_file), "-n"), "-d"), "-l")) self.assertEqual(dup_lines, 0)
def test_piping(): from sh import sort, du, glob, wc, ls # sort this directory by biggest file print sort(du(glob('*'), '-sb'), '-rn') # print the number of folders and files in /etc print wc(ls('/etc', '-l'), '-l')
def test_incremental_composition(self): from sh import ls, wc c1 = int(wc(ls("-A1", _piped=True), l=True).strip()) c2 = len(os.listdir(".")) if c1 != c2: with open("/tmp/fail", "a") as h: h.write("F**K\n") self.assertEqual(c1, c2)
def analyse(self): if self.exe_name: # check compile size stat_res = sh.stat('-c', '%s', self.exe_name) print("* Sim exe ({}) file size (bytes) : {}".format( self.exe_name, stat_res), file=res_file, end="") # check exe inst size size_res = sh.size(self.exe_name) print("* ELF segment breakdown : \n{}".format(size_res), file=res_file, end="") else: print("* No exe file given\n", file=res_file, end="") # check loc if self.src_name: wc_res = sh.wc('-l', self.src_name) print("* Source LOC : {}".format(wc_res), file=res_file, end="") # check against ref results if self.res_name and self.res_ref and not self.unique_res: col = 1 # only compare V (starts at col 1) _, diffMax, diffEps = compare_files(self.res_path, self.res_ref, col, col) print( "* Max difference between results files {} and {} for col {} :\n" "\t{:+.16g} ({:+.16g} machine epsilons)".format( self.res_name, self.res_ref, col, diffMax, diffEps), file=res_file)
def _pod_check(self): _pod_number = sh.wc( sh.kubectl('get', 'pod', '-l', 'commitHash=%s' % self.git_hash, '-n', self.name_space), '-l') if int(_pod_number.replace('\n', "")) != self.num: return False return True
def _get_unprocessed_pages(self, export_archive_path, extract_to_path): print('Extracting unprocessed pages') with zipfile.ZipFile(export_archive_path, 'r') as zipref: for member in zipref.namelist(): if member.startswith(UNPROCESSED_PAGES_DIR): zipref.extract(member, extract_to_path) unprocessed_path = os.path.join(extract_to_path, UNPROCESSED_PAGES_DIR) if not os.path.exists(unprocessed_path): shutil.rmtree(extract_to_path) raise CommandError('Export has no unprocessed pages.') unprocessed_pages = [] total_docs = 0 for page_filename in os.listdir(unprocessed_path): page_path = os.path.join(unprocessed_path, page_filename) page_search = re.search('page_(\d+).json.gz', page_filename) if page_search: page_number = int(page_search.group(1)) else: raise CommandError( 'Unexpected page filename: {}'.format(page_filename)) doc_count = int(sh.wc('-l', page_path).split(' ')[0]) total_docs += doc_count unprocessed_pages.append((page_path, page_number, doc_count)) if not unprocessed_pages: raise CommandError('No pages left to process') return total_docs, unprocessed_pages
def _get_unprocessed_pages(self, export_archive_path, extract_to_path): print('Extracting unprocessed pages') with zipfile.ZipFile(export_archive_path, 'r') as zipref: for member in zipref.namelist(): if member.startswith(UNPROCESSED_PAGES_DIR): zipref.extract(member, extract_to_path) unprocessed_path = os.path.join(extract_to_path, UNPROCESSED_PAGES_DIR) if not os.path.exists(unprocessed_path): shutil.rmtree(extract_to_path) raise CommandError('Export has no unprocessed pages.') unprocessed_pages = [] total_docs = 0 for page_filename in os.listdir(unprocessed_path): page_path = os.path.join(unprocessed_path, page_filename) page_search = re.search(r'page_(\d+).json.gz', page_filename) if page_search: page_number = int(page_search.group(1)) else: raise CommandError('Unexpected page filename: {}'.format(page_filename)) doc_count = int(sh.wc('-l', page_path).split(' ')[0]) total_docs += doc_count unprocessed_pages.append((page_path, page_number, doc_count)) if not unprocessed_pages: raise CommandError('No pages left to process') return total_docs, unprocessed_pages
def _check_deployment(self, _deploy_name): _num = sh.wc( sh.kubectl('get', 'pod', '-l', 'app=%s' % _deploy_name, '-n', self.name_space), '-l') self.num = int(_num.replace('\n', "")) if self.num < 2: log_print("%s pod 不存在." % _deploy_name) os._exit(4)
def _init_counter(self, filenames): counter = 0 for filename in filenames: try: counter = max(counter, int(sh.wc('-l', filename).split()[0])) except: print sys.stderr, "!! DiskBufferRunner._init_counter failed" return counter
def count(self): """We are going to default to the number of lines.""" # Third party modules # if os.name == "posix": import sh if os.name == "nt": import pbs3 # Count lines # if os.name == "posix": return int(sh.wc('-l', self.path).split()[0]) if os.name == "nt": return int(pbs3.Command("find")('/c', '/v', '""', self.path))
def test_command_wrapper(self): from sh import Command, which ls = Command(which("ls")) wc = Command(which("wc")) c1 = int(wc(ls("-A1"), l=True)) c2 = len(os.listdir(".")) self.assertEqual(c1, c2)
def CheckInputFiles(Input): ##Check geno and snp compatibility lineNo = "" for line in sh.grep(sh.wc("-l", Input + ".geno", Input + ".snp"), Input): if lineNo == "": lineNo = line.strip().split()[0] elif lineNo == line.strip().split()[0]: break elif lineNo != line.strip().split()[0]: raise IOError("Input .snp and .geno files do not match.") ##Check geno and ind compatibility with open(Input + ".geno", "r") as f: for line in f: if str(len(line.strip())) == sh.wc("-l", Input + ".ind").strip().split()[0]: break else: raise IOError("Input .ind and .geno files do not match.")
def __get_host_count(self, host_type=""): """ Get the current number of VMs running that match host_type string """ hosts = 0 if host_type: hosts = sh.wc(sh.awk(sh.grep(sh.virsh('list', '--all'), '%s' % host_type), '{print $2}'), '-l') else: sys.exit("Can't count non-existant host_type") return str(hosts).rstrip()
def _runTest(self, shards, max_threads): for threads in range(1, max_threads + 1): for shard in range(0, shards): with sh.sudo: outfile = output_file_name(shards, shard, threads) zmap(p=80, T=threads, shards=shards, shard=shard, _out="tempfile") parse("tempfile", _out=outfile) dup_lines = int(wc(uniq(cat(outfile), "-d"), "-l")) self.assertEqual(dup_lines, 0) shard_file = shard_file_name(shards, threads) if shard == 0: cat(outfile, _out=shard_file) else: cat(shard_file, outfile, _out="tempfile") mv("tempfile", shard_file) for threads in range(1, max_threads + 1): shard_file = shard_file_name(shards, threads) num_lines = int(wc(cat(shard_file), "-l")) self.assertEqual(num_lines, TestSharding.NUM_IPS) dup_lines = int(wc(uniq(sh.sort(cat(shard_file), "-n"), "-d"), "-l")) self.assertEqual(dup_lines, 0)
def get_available_gpus(yagi=None): """Returns the number of available gpus on a yagi Arguments: yagi: string, name of yagi Returns: num_gpus: int, number of gpus available on the yagi""" if not yagi: num_gpus = int( sh.wc(sh.grep(sh.grep(sh.lspci(), 'VGA'), 'NVIDIA'), '-l')) else: num_gpus = int(ssh(yagi, 'lspci | grep VGA | grep NVIDIA | wc -l')) return num_gpus
def monitor_fd(): from sh import lsof, wc, ls, pkill, service try: mongodb_pid = lsof('-t', '-i:27017', '-sTCP:LISTEN').split('\n')[0] num_fd = int(wc(lsof('-p', '{0}'.format(mongodb_pid)), '-l').strip()) print 'num_fd:', num_fd if num_fd >= 4096: #service('mongodb', 'restart') pkill('-f', 'run.py') for p in chain(CRAWLER_PEERS, POWER_PEERS, TEXT_PEERS): with settings(host_string=p['host_string'], warn_only=True): run('kill $(sudo lsof -t -i:{0})'.format(p['port'])) except Exception: import traceback traceback.print_exc()
def get_wc(content_dir): """ """ filetype = "*.markdown" cd(content_dir) files_list = find(".", "-name", "*.markdown") files_arr = files_list.split('\n') word_count = 0 for f in files_arr: if f: try: file_word_count = int(wc(cat(content_dir + f), "-w")) word_count += file_word_count except: pass return word_count
def mv_chaos_data(REPO_NAME, XRDF): """对比 data/ 目录和有效节点 ID - mv 文章目录到指定备案目录 CF.STUFF """ K2DESC = XRDF['k2desc'] RDF_DESC = XRDF['doc']['RDF:RDF']['RDF:Description'] _AIM_DRI = "%s/data"% REPO_NAME #print _AIM_DRI #data_ls = ls("-1", _AIM_DRI) #data_li = data_ls.stdout.split() data_li = os.listdir(_AIM_DRI) print len(data_li), "\t<-- %s sub dirs"% _AIM_DRI #print len(RDF_DESC), "\t<-- doc['RDF:RDF']['RDF:Description']" #print len(K2DESC.keys()), "\t<-- K2DESC.keys()" _K4DESC = [] for i in K2DESC.keys(): _K4DESC.append(K2DESC[i]['@RDF:about'][-14:]) #print len(_K4DESC), "\t<-- _K4DESC" #return None max_action = len(data_li) #import progressbar #(end=max_action, width=79) opt_pbar = {'end':max_action, 'width':64 , 'fill': '>' } pbar = progressbar.AnimatedProgressBar(**opt_pbar) count = 0 for li in data_li: #_id = CF.IDPRE % li #return None pbar+1 pbar.show_progress() if li not in _K4DESC: #K2DESC.keys(): count += 1 _SRC = "%s/%s"% (_AIM_DRI, li) #mv(_SRC, CF.STUFF) cp("-rf", _SRC, CF.STUFF) rm("-rf", _SRC) #print "mv -v %s %s"% (_SRC, CF.STUFF) #break print "\n\tmv %s dir into %s"% (count, CF.STUFF) print "means keep %s dir"% (len(data_li) - count) print ">>> ls -1 %s|wc -l"% _AIM_DRI print(wc(ls("-1", _AIM_DRI), "-l")) return None
def count(pkg): pkgsum = 0 print(pkg) files = sh.git('ls-files', _cwd=pkg) for f in files: f = f.strip() p = os.path.join(pkg, f) if '/results/' in p: continue if not os.path.exists(p): continue if os.path.isdir(p): continue if os.path.islink(p): continue b, ext = os.path.splitext(f) if ext in '.ipynb .xlsx .h5 .png .npy .pdf .nxs .gz'.split(): continue lines = sh.wc('-l', p).strip().split()[0] # print(p, lines) pkgsum += int(lines) return pkgsum
#!/usr/bin/env python from sh import wc count = wc('-l', '../DATA/words.txt') print(count)
def count_lines(self): return int(sh.wc('-l', self.path).split()[0])
{fullcmd} from {BUILDDIR} (i.e., CMake's build stage). Scroll up for details or look at the build log via less -R {build_log} Exiting... """) stringio_obj4 = io.StringIO() num_estimated_warnings = 0 try: sh.wc(sh.grep("warning: ", build_log), "-l", _out=stringio_obj4) num_estimated_warnings=stringio_obj4.getvalue().strip() except sh.ErrorReturnCode_1: pass rich.print("") if running_config_and_generate: rich.print("CMake's config+generate+build stages all completed successfully") rich.print("") else: rich.print("CMake's build stage completed successfully") if "DBT_INSTALL_DIR" in os.environ and not re.search(r"^/?$", os.environ["DBT_INSTALL_DIR"]): for filename in os.listdir(os.environ["DBT_INSTALL_DIR"]): file_path = os.path.join(os.environ["DBT_INSTALL_DIR"], filename)
def cnt(cmd): try: return int(sh.wc(cmd(_ok_code=xrange(256)), '-l').stdout) except: return 0
def _count_commits(compare_spec): return int(sh.wc(git.log(compare_spec, '--oneline', _piped=True), '-l'))
print("INFO: using cached files in %s" % args.cache) srcdir = os.path.join(os.path.abspath(args.cache), default_subdirectory) if os.path.isdir(srcdir) == False: print("ERROR: cache directory '%s' is not valid" % srcdir) sys.exit(1) else: tmpdir = tempfile.mkdtemp(prefix="noto-") if args.verbose: print("INFO: using temporary directory %s" % tmpdir) os.chdir(tmpdir) if args.verbose: print("INFO: cloning git repo %s" % args.repo) sh.git.clone(args.repo, _err_to_out=True, _out=os.path.join(tmpdir, "git-checkout.stdout")) if args.verbose: print("INFO: downloaded %s files from git" % sh.wc(sh.find("noto-emoji"), "-l").strip()) srcdir = os.path.join(tmpdir, "noto-emoji", default_subdirectory) if os.path.isdir(args.output) == False: if args.verbose: print("INFO: creating directory '%s'" % args.output) os.makedirs(args.output) files = [] for file in os.listdir(srcdir): if file.endswith(".svg"): files.append(file) if args.verbose: sys.stdout.write("INFO: copying...")
def test_composition(self): from sh import ls, wc c1 = int(wc(ls("-A1"), l=True)) c2 = len(os.listdir(".")) self.assertEqual(c1, c2)
def count(self): """We are going to default to the number of lines""" return int(sh.wc('-l', self.path).split()[0])
print("ERROR: cache directory '%s' is not valid" % srcdir) sys.exit(1) else: tmpdir = tempfile.mkdtemp(prefix="noto-") if args.verbose: print("INFO: using temporary directory %s" % tmpdir) os.chdir(tmpdir) if args.verbose: print("INFO: cloning git repo %s" % args.repo) sh.git.clone(args.repo, _err_to_out=True, _out=os.path.join(tmpdir, "git-checkout.stdout")) if args.verbose: print("INFO: downloaded %s files from git" % sh.wc(sh.find("noto-emoji"), "-l").strip()) srcdir = os.path.join(tmpdir, "noto-emoji", default_subdirectory) if os.path.isdir(args.output) == False: if args.verbose: print("INFO: creating directory '%s'" % args.output) os.makedirs(args.output) files = [] for file in os.listdir(srcdir): if file.endswith(".svg"): files.append(file) with open(args.normalmap) as nmfp: normal_map = json.load(nmfp)
def obscenities(): from urllib.request import urlopen resp = urlopen("http://cs.cmu.edu/~biglou/resources/bad-words.txt") badwords = str(resp.read()).split("\\n") return badwords[1:-1] + ["nsfw"] bad_re = re.compile("\\b(" + "|".join(obscenities()) + ")\\b", re.IGNORECASE) def is_clean(line): res = bad_re.search(line) return res is None n = int(sh.wc("reddit_xmas_2017.json", l=True).split(" ")[0]) with concurrent.futures.ProcessPoolExecutor() as p: clean = p.map(is_clean, open("reddit_xmas_2017.json", "rt"), chunksize=10000) cleaned = [ json.loads(line) for line, clean in zip( tqdm.tqdm(open("reddit_xmas_2017.json", "rt"), total=n), clean) if clean ] json.dump(cleaned, open("reddit_xmas_2017_clean.json", "wt"))
# This is an example configuration that can be modified or used as-is. The # commands are called through the sh module (amoffat.github.io/sh). # 'fortune' messages - only short ones though - fortune = sh.fortune('-s') # 'uname' output, including kernel name (-s), hostname (-n) and kernel # version (-r) uname = sh.uname('-snr').rstrip() # number of pacman updates available, counted through the 'checkupdates' # script included with pacman. Note: # a) This (obviously) only works in distros using pacman and # b) The checkupdates script is kinda slow (3-6 seconds in my experience), # so, if the script takes a long time to run, try disabling this part. repo_update_count = sh.wc(sh.checkupdates(), '-l').rstrip() # The path the ASCII image. image_path = '~/.motd_image' # The color in which the image will be printed. This can be any string, but # you will probably want to pass either one of the constants in # colorama.Fore, such as Fore.RED, or a color format string such as # '\033[92m' (for red). image_color = Fore.BLUE # The messages that will be printed next to the ASCII image. See the messages # parameter in the docstring of the get_motd function for details. messages = [ uname, f'{colored(repo_update_count, attrs=["bold"])} updates available' ]
def process_short(out_file, in_file, algorithm): global CONFIG beg_date = arrow.get('2019-10-01', 'YYYY-MM-DD') end_date = arrow.get('2020-01-31', 'YYYY-MM-DD') write_stdout(f'Forecast from {beg_date} to {end_date}\n') # 123 days lines_count = 0 if wc and sed: write_stdout(f'Counting {in_file} non-blank lines... ') lines_count = int(wc(sed(r'/^\s*$/d', in_file), '-l')) print(lines_count) ops_count = lines_count * 123 out_csv_file = open(out_file, 'w', newline='') in_csv_file = open(in_file, 'r', newline='\n') csv_writer = csv.writer(out_csv_file, delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL) csv_reader = csv.reader(in_csv_file, delimiter=',') engine = create_engine(CONFIG['mysql']) print(f'Processing short output with {algorithm} algorithm...') bar = None if ops_count > 0: bar = ChargingBar('Waiting...', max=ops_count) bar.start() i = 0 dfs = {} for row in csv_reader: if row is not None and len(row): store_id = int(row[0]) barcode = int(row[1]) key = f'{store_id}-{barcode}' try: df_barcode = dfs[key] except KeyError: df_barcode = get_barcode_daily_sales(engine, store_id, barcode) dfs[key] = df_barcode for j, d in enumerate(arrow.Arrow.range('day', beg_date, end_date)): forecast_from_date = d forecast_before_date = forecast_from_date.shift(days=5) forecast = do_forecast(algorithm, df_barcode, forecast_from_date, forecast_before_date) csv_writer.writerow([int(round(forecast))]) if bar: curr_op = i * 123 + j if curr_op % 5 == 0: bar.message = f'{curr_op} of {ops_count}' bar.update() bar.next() i += 1 bar.message = 'Done' bar.update() out_csv_file.close() in_csv_file.close()
#!/usr/bin/env python # -*- coding: utf-8 -*- from sh import git, ls, wc # checkout master branch git(checkout="master") # print(the contents of this directory print(ls("-l")) # get the longest line of this file longest_line = wc(__file__, "-L")
#!/usr/bin/env python from sh import wc wc_output = wc('-l', '/etc/passwd') pw_lines, junk = wc_output.split() print "{0} lines in /etc/passwd".format(pw_lines)
#coding=utf8 from sh import sort, du, glob, wc, ls #Piping 管道 # sort this directory by biggest file #print(sort(du(glob("*"), "-sb"), "-rn")) # print the number of folders and files in / print(wc(ls("-l", "/"), "-l"))
print("ERROR: cache directory '%s' is not valid" % srcdir) sys.exit(1) else: tmpdir = tempfile.mkdtemp(prefix="twemoji-") if args.verbose: print("INFO: using temporary directory %s" % tmpdir) os.chdir(tmpdir) if args.verbose: print("INFO: cloning git repo %s" % args.repo) sh.git.clone(args.repo, _err_to_out=True, _out=os.path.join(tmpdir, "git-checkout.stdout")) if args.verbose: print("INFO: downloaded %s files from git" % sh.wc(sh.find("twemoji"), "-l").strip()) srcdir = os.path.join(tmpdir, "twemoji", default_subdirectory) if os.path.isdir(args.output) == False: if args.verbose: print("INFO: creating directory '%s'" % args.output) os.makedirs(args.output) files = [] for file in os.listdir(srcdir): if file.endswith(".svg"): files.append(file) if args.verbose: sys.stdout.write("INFO: copying...")
def readfile(input_file): txtoutput = "out.tmp" sh.rm(txtoutput) res = sh.pdf2txt(input_file, _out=txtoutput) print "nb lines in pdf2txt", res res = wc("-l", txtoutput) print "nb lines in ", txtoutput, ":", res #print cat(txtoutput) lst_desc = [] lst_qte = [] lst_cal = [] lst_prot = [] lst_glu = [] lst_lip = [] mode = MODE_NOTHING previous = 'lip' for line in cat(txtoutput, _iter=True): line = line.lower().strip() mode, store_line = define_mode(line, mode) if store_line: if (mode == MODE_DESC): lst_desc.append(line) elif (mode == MODE_QTE): lst_qte.append(line) elif (mode == MODE_PROPERTIES): if previous == 'lip': lst_cal.append(line) previous = 'cal' elif previous == 'cal': lst_prot.append(line) previous = 'prot' elif previous == 'prot': lst_glu.append(line) previous = 'glu' elif previous == 'glu': lst_lip.append(line) previous = 'lip' d = len(lst_desc) q = len(lst_qte) c = len(lst_cal) p = len(lst_prot) g = len(lst_glu) l = len(lst_lip) print "desc:", d print "qte:", q print "cal:", c print "prot:", p print "glu:", g print "lip:", l #for l in lst_cal: # print l data_check = True if not((d != 0) and (d == q) and (q == c) and (c == p)): data_check = False if not((p == g) and (g == l)): data_check = False if data_check: print "data check: OK" try: store_data(lst_desc, lst_qte, lst_cal, lst_prot, lst_glu, lst_lip) except: pass else: # move file if no errror input_filename = input_file.split('/') target_dir = "/home/guillaume/git/distark/data/ok/" target = os.path.join(target_dir, input_filename[-1]) print "mv", input_file, target mv(input_file, target) else: print "data check: KO"
print('-' * 50) from sh import ls, glob print(ls('-ld', glob('/etc/pr*'))) print('-' * 50) w = sh.who() print(w) print('-' * 50) disk_usage = sh.df('-h') print(disk_usage) print('-' * 50) from sh import uname print(uname()) print(uname('-a')) print(uname(a=True)) print('-' * 50) from sh import grep, wc # grep 'sh' /etc/passwd | wc -l print(grep('sh', '/etc/passwd')) print(wc(grep('sh', '/etc/passwd'), l=True)) print('-' * 50) from sh import tr fruits = 'apple banana mango orange'.split() print(tr("[:lower:]", "[:upper:]", _in=fruits))
from sh import git,ls,wc """ http://amoffat.github.com/sh/ sh 可让你调用任意程序,就好象是一个函数一般 """ #git(status) print ls("-l") print wc(__file__,"-L")
def _total_nodes(): num = sh.wc(sh.Command('docker')('ps', '--filter', 'ancestor=hectcastro/riak-cs', '-q'), '-l') return int(num)
def count(self): return int(sh.wc('-l', self.path).stdout.split()[0])
def num_commits(repo_dir): git_repo = git.bake('-C', os.path.expanduser(repo_dir)) logs = git_repo.log('--oneline', '--first-parent') n = wc(logs, '-l') return int(n)