def move_feed_files(s, source_host, source_paths, dest_host, dest_path): ''' moves files and adds to feed directory and table Parameters ---------- s | object: session object source_host | str: file host source_paths | list[str]: file paths dest_host | str: output host dest_path | str: output directory ''' #different from move_files, adds to feed named_host = socket.gethostname() destination = ':'.join((dest_host, dest_path)) if named_host == source_host: for source_path in source_paths: ppdata.rsync_copy(source_path, destination) set_feed_table(s, source_host, source_path, dest_host, dest_path) shutil.rmtree(source_path) else: with ppdata.ssh_scope(source_host) as ssh: for source_path in source_paths: rsync_copy_command = '''rsync -ac {source_path} {destination}'''.format( source_path=source_path, destination=destination) rsync_del_command = '''rm -r {source_path}'''.format( source_path=source_path) ssh.exec_command(rsync_copy_command) set_feed_table(s, source_host, source_path, dest_host, dest_path) ssh.exec_command(rsync_del_command) print('Completed transfer')
def calc_size(host, path, username=None, password=None): ''' calculates size of directory or file on any host logins into host if necessary Parameters ---------- host | str: host of file path | str: path of directory or file username | str: username --defaults to None password | str: password --defaults to None Returns ------- float: size of directory or file in MB >>> calc_size('folio', '/home/immwa/test_data/zen.2456617.17386.xx.uvcRRE') 205.2 ''' if host == pdbi.hostnames.get(socket.gethostname(), socket.gethostname()): size_bytes = byte_size(path) else: with ppdata.ssh_scope(host, username, password) as ssh: with ssh.open_sftp() as sftp: size_bytes = sftp.stat(path).st_size return human_size(size_bytes)
def calc_md5sum(host, path, username=None, password=None): ''' calculates md5 checksum of directory or file on any host logins into host if necessary Parameters ---------- host | str: host of file path | str: path of directory or file username | str: username --defaults to None password | str: password --defaults to None Returns ------- str: md5 checksum >>> calc_md5sum('folio', '/home/immwa/test_data/zen.2456617.17386.xx.uvcRRE') '7d5ac942dd37c4ddfb99728359e42331' ''' if host == pdbi.hostnames.get(socket.gethostname(), socket.gethostname()): md5 = get_md5sum(path) else: with ppdata.ssh_scope(host, username, password) as ssh: try: with ssh.open_sftp() as sftp: with sftp.file(path, mode='r') as remote_path: md5 = remote_path.check('md5', block_size=65536) except IOError: _, md5_out, _ = ssh.exec_command('md5sum {vis_path}'.format(vis_path=os.path.join(path, 'visdata'))) md5 = md5_out.read().split()[0] return md5
def move_feed_files(s, source_host, source_paths, dest_host, dest_path): ''' moves files and adds to feed directory and table Parameters ---------- s | object: session object source_host | str: file host source_paths | list[str]: file paths dest_host | str: output host dest_path | str: output directory ''' #different from move_files, adds to feed named_host = socket.gethostname() destination = ':'.join((dest_host, dest_path)) if named_host == source_host: for source_path in source_paths: ppdata.rsync_copy(source_path, destination) set_feed_table(s, source_host, source_path, dest_host, dest_path) shutil.rmtree(source_path) else: with ppdata.ssh_scope(source_host) as ssh: for source_path in source_paths: rsync_copy_command = '''rsync -ac {source_path} {destination}'''.format(source_path=source_path, destination=destination) rsync_del_command = '''rm -r {source_path}'''.format(source_path=source_path) ssh.exec_command(rsync_copy_command) set_feed_table(s, source_host, source_path, dest_host, dest_path) ssh.exec_command(rsync_del_command) print('Completed transfer')
def delete_files(s, source_host, source_paths, dest_host, dest_path): ''' copies files to new directory and deletes old file destination path should be periodically deleted manually NO deletion of files UNLESS manually Parameters ---------- s | object: session object source_host | str: user host source_paths | list[str]: file paths dest_host | str: output host dest_path | str: output directory ''' destination = ':'.join((dest_host, dest_path)) out_host = socket.gethostname() if source_host == pdbi.hostnames.get(out_host, out_host): for source_path in source_paths: ppdata.rsync_copy(source_path, destination) set_delete_table(s, source_host, source_path, dest_host, dest_path) shutil.rmtree(source) else: with ppdata.ssh_scope(source_host) as ssh: for source_path in source_paths: rsync_copy_command = '''rsync -ac {source_path} {destination}'''.format(source_path=source_path, destination=destination) rsync_del_command = '''rm -r {source_path}'''.format(source_path=source_path) ssh.exec_command(rsync_copy_command) set_delete_table(s, source_host, source_path, dest_host, dest_path) ssh.exec_command(rsync_del_command) print('Completed transfer')
def add_data(s, host): ''' generates table information for all tables Parameters ---------- s | object: session object host | str: host of filesystem ''' with ppdata.ssh_scope(host) as ssh: iostat_all_data = iostat(ssh, host) for name, iostat_data in iostat_all_data.items(): s.add(pdbi.Iostat(**iostat_data)) ram_data = ram_free(ssh, host) s.add(pdbi.Ram(**ram_data)) cpu_all_data = cpu_perc(ssh, host) for key, cpu_data in cpu_all_data.items(): s.add(pdbi.Cpu(**cpu_data)) if host in ('folio', ): paths = ('/data3', '/data4') for path in paths: system_data = filesystem(ssh, path) s.add(pdbi.Filesystem(**system_data))
def add_data(s, host): ''' generates table information for all tables Parameters ---------- s | object: session object host | str: host of filesystem ''' with ppdata.ssh_scope(host) as ssh: iostat_all_data = iostat(ssh, host) for name, iostat_data in iostat_all_data.items(): s.add(pdbi.Iostat(**iostat_data)) ram_data = ram_free(ssh, host) s.add(pdbi.Ram(**ram_data)) cpu_all_data = cpu_perc(ssh, host) for key, cpu_data in cpu_all_data.items(): s.add(pdbi.Cpu(**cpu_data)) if host in ('folio',): paths = ('/data3', '/data4') for path in paths: system_data = filesystem(ssh, path) s.add(pdbi.Filesystem(**system_data))
def update_sources(s, username=None): ''' fixes database files and directories that have been moved/deleted Parameters ---------- s | object: session object ''' source_host = socket.gethostname() table = pdbi.File hosts = pdbi.hostnames.values() for host in hosts: FILEs = s.query(table).filter_by(host=host).all() if source_host == host: for FILE in FILEs: if not os.path.exists(FILE.source): print( "deleting host, base path, filename: {} {} {}".format( host, FILE.base_path, FILE.filename)) s.delete(FILE) else: try: with ppdata.ssh_scope(host, username=username) as ssh: with ssh.open_sftp() as sftp: for FILE in FILEs: if not path_exists(sftp, FILE.source): print( "deleting host, base path, filename: {} {} {}" .format(host, FILE.base_path, FILE.filename)) s.delete(FILE) except paramiko.ssh_exception.AuthenticationException: continue except: raise
def calc_size(host, path, username=None, password=None): ''' calculates size of directory or file on any host logins into host if necessary Parameters ---------- host | str: host of file path | str: path of directory or file username | str: username --defaults to None password | str: password --defaults to None Returns ------- float: size of directory or file in MB >>> calc_size('folio', '/home/immwa/test_data/zen.2456617.17386.xx.uvcRRE') 205.2 ''' if host == pdbi.hostnames.get(socket.gethostname(), socket.gethostname()): size_bytes = byte_size(path) else: with ppdata.ssh_scope(host, username, password) as ssh: cmd = ' '.join(["du -s -b", path]) stdin_, stdout_, stderr_ = ssh.exec_command(cmd) out = stdout_.read() out_list = out.split() size_bytes = int(out_list[0]) return human_size(size_bytes)
def parse_sources(source_host, source_paths_str, username=None, password=None): ''' parses source path string and gets list of sources Parameters ---------- source_host | str: host of files source_paths_str | str: string to indicate paths of uv* files, expands like unix filesystem syntax username | str: username --defaults to None password | str: password --defaults to None Returns ------- list[str]: sorted list of source paths ''' if source_host == pdbi.hostnames.get(socket.gethostname(), socket.gethostname()): source_paths = glob.glob(source_paths_str) else: ls_comm = 'ls -d {source_paths_str}'.format( source_paths_str=source_paths_str) with ppdata.ssh_scope(source_host, username, password) as ssh: _, path_out, _ = ssh.exec_command(ls_comm) source_paths = path_out.read().splitlines()[:-1] return sorted(source_paths)
def calc_md5sum(host, path, username=None, password=None): ''' calculates md5 checksum of directory or file on any host logins into host if necessary Parameters ---------- host | str: host of file path | str: path of directory or file username | str: username --defaults to None password | str: password --defaults to None Returns ------- str: md5 checksum >>> calc_md5sum('folio', '/home/immwa/test_data/zen.2456617.17386.xx.uvcRRE') '7d5ac942dd37c4ddfb99728359e42331' ''' if host == pdbi.hostnames.get(socket.gethostname(), socket.gethostname()): md5 = get_md5sum(path) else: with ppdata.ssh_scope(host, username, password) as ssh: try: with ssh.open_sftp() as sftp: with sftp.file(path, mode='r') as remote_path: md5 = remote_path.check('md5', block_size=65536) except IOError: _, md5_out, _ = ssh.exec_command('md5sum {vis_path}'.format( vis_path=os.path.join(path, 'visdata'))) md5 = md5_out.read().split()[0] return md5
def move_files(s, source_host, source_paths_str, dest_host, dest_path, username, password): ''' move files by rsyncing them and checking md5sum through rsync option Parameters ---------- s | object: session object source_host | str: file host source_paths_str | str: file paths -- can include wildcard, expanded alike unix filesystem syntax dest_host | str: output host dest_path | str: output directory username | str: username password | str: password ''' source_paths = file_data.parse_sources(source_host, source_paths_str, username, password) is_existent = exist_check(s, source_host, source_paths) if not is_existent: print('File(s) not in database') return dest = ':'.join((dest_host, dest_path)) destination = '@'.join((username, dest)) out_host = socket.gethostname() if source_host == pdbi.hostnames.get(out_host, out_host): for source_path in source_paths: ppdata.rsync_copy(source_path, destination) set_move_table(s, source_host, source_path, dest_host, dest_path) shutil.rmtree(source_path) else: with ppdata.ssh_scope(source_host, username, password) as ssh: for source_path in source_paths: rsync_copy_command = '''rsync -ac {source_path} {destination}'''.format( source_path=source_path, destination=destination) rsync_del_command = '''rm -r {source_path}'''.format( source_path=source_path) ssh.exec_command(rsync_copy_command) set_move_table(s, source_host, source_path, dest_host, dest_path) ssh.exec_command(rsync_del_command) print('Completed transfer')
def move_files(s, source_host, source_paths_str, dest_host, dest_path, username, password): ''' move files by rsyncing them and checking md5sum through rsync option Parameters ---------- s | object: session object source_host | str: file host source_paths_str | str: file paths -- can include wildcard, expanded alike unix filesystem syntax dest_host | str: output host dest_path | str: output directory username | str: username password | str: password ''' source_paths = file_data.parse_sources(source_host, source_paths_str, username, password) is_existent = exist_check(s, source_host, source_paths) if not is_existent: print('File(s) not in database') return dest = ':'.join((dest_host, dest_path)) destination = '@'.join((username, dest)) out_host = socket.gethostname() if source_host == pdbi.hostnames.get(out_host, out_host): for source_path in source_paths: ppdata.rsync_copy(source_path, destination) set_move_table(s, source_host, source_path, dest_host, dest_path) shutil.rmtree(source_path) else: with ppdata.ssh_scope(source_host, username, password) as ssh: for source_path in source_paths: rsync_copy_command = '''rsync -ac {source_path} {destination}'''.format(source_path=source_path, destination=destination) rsync_del_command = '''rm -r {source_path}'''.format(source_path=source_path) ssh.exec_command(rsync_copy_command) set_move_table(s, source_host, source_path, dest_host, dest_path) ssh.exec_command(rsync_del_command) print('Completed transfer')
def update_sources(s): ''' fixes database files and directories that have been moved/deleted Parameters ---------- s | object: session object ''' source_host = socket.gethostname() table = pdbi.File hosts = pdbi.hostnames.values() for host in hosts: FILEs = s.query(table).filter_by(host=host).all() if source_host == host: for FILE in FILEs: if not os.path.exists(FILE.source): s.delete(FILE) else: with ppdata.ssh_scope(host) as ssh: with ssh.open_sftp() as sftp: for FILE in FILEs: if not path_exists(sftp, FILE.source): s.delete(FILE)
def parse_sources(source_host, source_paths_str, username=None, password=None): ''' parses source path string and gets list of sources Parameters ---------- source_host | str: host of files source_paths_str | str: string to indicate paths of uv* files, expands like unix filesystem syntax username | str: username --defaults to None password | str: password --defaults to None Returns ------- list[str]: sorted list of source paths ''' if source_host == pdbi.hostnames.get(socket.gethostname(), socket.gethostname()): source_paths = glob.glob(source_paths_str) else: ls_comm = 'ls -d {source_paths_str}'.format(source_paths_str=source_paths_str) with ppdata.ssh_scope(source_host, username, password) as ssh: _, path_out, _ = ssh.exec_command(ls_comm) source_paths = path_out.read().splitlines()[:-1] return sorted(source_paths)
def calc_uv_data(host, path, username=None, password=None): ''' takes in uv* files and pulls data about observation Parameters ---------- host | str: host of system path | str: path of uv* file username | str: username --defaults to None password | str: password --defaults to None Returns ------- tuple: float(5): time start float(5): time end float(5): delta time float(5): julian date str: polarization float(5): length int: obsnum of uv file observation OR tuple: None for every field if no corresponding observation found >>> calc_uv_data('folio', '/home/immwa/test_data/zen.2456617.17386.xx.uvcRRE') (2456617.17386, 2456617.18032, 0.0005, 2456617.18069, 'xx', 0.00696, 21480813086) ''' named_host = pdbi.hostnames.get(socket.gethostname(), socket.gethostname()) if named_host == host: filetype = path.split('.')[-1] if filetype not in ('uv', 'uvcRRE'): return (None,) * 7 else: try: uv = A.miriad.UV(path) except: return (None,) * 7 time_start, time_end, delta_time, length = calc_times(uv) julian_date = five_round(uv['time']) polarization = pdbi.pol_to_str[uv['pol']] if uv['npol'] == 1 else 'all' if uv['npol'] == 4 else None obsnum = jdpol_to_obsnum(julian_date, polarization, length) if length > 0 else None else: uv_data_script = os.path.expanduser('~/paperdata/paper/data/uv_data.py') moved_script = './uv_data.py' uv_comm = 'python {moved_script} {host}:{path}'.format(moved_script=moved_script, host=host, path=path) virt_env = 'source /usr/global/paper/CanopyVirtualEnvs/PAPER_Distiller/bin/activate' with ppdata.ssh_scope(host, username, password) as ssh: with ssh.open_sftp() as sftp: try: filestat = sftp.stat(uv_data_script) except IOError: try: filestat = sftp.stat(moved_script) except IOError: sftp.put(uv_data_script, moved_script) _, _, _ = ssh.exec_command(virt_env) _, uv_dat, _ = ssh.exec_command(uv_comm) uv_get = uv_dat.read() time_start, time_end, delta_time,\ julian_date, polarization, length, obsnum = [five_round(float(info)) if key in (0, 1, 2, 3, 5) else int(info) if key in (6,) else info for key, info in enumerate(uv_get.split(','))] return time_start, time_end, delta_time, julian_date, polarization, length, obsnum