def update_md5(s): ''' updates md5sums for all files without in database Parameters ---------- s | object: session object ''' table = pdbi.File FILEs = s.query(table).filter_by(md5sum=None).all() for FILE in FILEs: source = FILE.source timestamp = datetime.datetime.utcnow() md5_dict = { 'md5sum': file_data.calc_md5sum(FILE.host, source), 'timestamp': timestamp } for field, value in md5_dict.items(): setattr(FILE, field, value) log_data = { 'action': 'update md5sum', 'table': 'File', 'identifier': source, 'log_id': str(uuid.uuid4()), 'timestamp': timestamp } s.add(pdbi.Log(**log_data))
def test_md5(self): c_md5 = file_data.get_md5sum(self.uv_file) md5 = '3187fa8ca0bd000ac984bc33369363b8' self.assertEqual(c_md5, md5, msg='md5sum generated is wrong') nc_md5 = file_data.calc_md5sum('folio.sas.upenn.edu', self.uv_file) self.assertEqual(nc_md5, md5, msg='md5sum generated is wrong')
def update_md5(s): ''' updates md5sums for all files without in database Parameters ---------- s | object: session object ''' table = ddbi.File FILEs = s.query(table).filter_by(md5sum=None).all() for FILE in FILEs: FILE.md5sum = file_data.calc_md5sum(FILE.host, FILE.filename)
def update_md5(s): ''' updates md5sums for all files without in database Parameters ---------- s | object: session object ''' table = pdbi.File FILEs = s.query(table).filter_by(md5sum=None).all() for FILE in FILEs: source = FILE.source timestamp = datetime.datetime.utcnow() md5_dict = {'md5sum': file_data.calc_md5sum(FILE.host, source), 'timestamp': timestamp} for field, value in md5_dict.items(): setattr(FILE, field, value) log_data = {'action': 'update md5sum', 'table': 'File', 'identifier': source, 'log_id': str(uuid.uuid4()), 'timestamp': timestamp} s.add(pdbi.Log(**log_data))
def add_data(s, sess): ''' transfer data from paperdistiller database to create data for paperdata tables Parameters ---------- s | object: distiller session object sess | object: data session object Returns ------- dict: movable paths for each filetype ''' obs_table = pdbi.Observation file_table = pdbi.File log_table = pdbi.Log raw_OBSs = get_observations(s) #need to keep dict of list of files to move of each type movable_paths = {'uv':[], 'uvcRRE':[], 'npz':[]} named_host = socket.gethostname() for OBS in raw_OBSs: table = ddbi.File FILE = s.query(table).filter(table.obsnum == OBS.obsnum).one() host = FILE.host path = FILE.filename base_path, filename, filetype = file_data.file_names(path) source = ':'.join((host, path)) julian_date = OBS.julian_date polarization = 'all' if julian_date < 2456400 else OBS.pol if host == named_host: try: uv = A.miriad.UV(path) time_start, time_end, delta_time, _ = uv_data.calc_times(uv) except: continue else: time_start, time_end, delta_time, _, _, _, _ = uv_data.calc_uv_data(host, path) era, julian_day, lst = uv_data.date_info(julian_date) md5 = FILE.md5sum if md5 is None: md5 = file_data.calc_md5sum(host, path) timestamp = datetime.datetime.utcnow() obs_info = {'obsnum': OBS.obsnum, 'julian_date': julian_date, 'polarization': polarization, 'julian_day': julian_day, 'lst': lst, 'era': era, 'era_type': None, 'length': OBS.length, 'time_start': time_start, 'time_end': time_end, 'delta_time': delta_time, 'prev_obs': None, 'next_obs': None, 'is_edge': None, 'timestamp': timestamp} raw_info = {'host': host, 'base_path': base_path, 'filename': filename, 'filetype': filetype, 'source': source, 'obsnum': OBS.obsnum, 'filesize': file_data.calc_size(host, path), 'md5sum': md5, 'tape_index': None, 'init_host': host, 'is_tapeable': True, 'is_deletable': False, 'timestamp': timestamp} log_info = {'action': 'add by bridge', 'table': None, 'identifier': source, 'log_id': str(uuid.uuid4()), 'timestamp': timestamp} sess.add(obs_table(**obs_info)) sess.add(file_table(**file_info)) sess.add(log_table(**log_info)) movable_paths[filetype].append(path) compr_filename = ''.join((filename, 'cRRE')) compr_path = os.path.join(base_path, compr_filename) if os.path.isdir(compr_path): compr_filetype = 'uvcRRE' compr_info = copy.deepcopy(raw_info) compr_entry = {'filename': compr_filename, 'filetype': compr_filetype, 'filesize': file_data.calc_size(host, base_path, compr_filename), 'md5sum': file_data.calc_md5sum(host, base_path, compr_filename), 'is_tapeable': False} compr_info.update(compr_entry) sess.add(file_table(**compr_info)) movable_paths[compr_filetype].append(compr_path) npz_filename = ''.join((filename, 'cRE.npz')) npz_path = os.path.join(base_path, npz_filename) if os.path.isfile(npz_path): npz_filetype = 'npz' npz_info = copy.deepcopy(raw_info) npz_entry = {'filename': npz_filename, 'filetype': npz_filetype, 'filesize': file_data.calc_size(host, base_path, npz_filename), 'md5sum': file_data.calc_md5sum(host, base_path, npz_filename), 'is_tapeable': False} npz_info.update(npz_entry) sess.add(file_table(**npz_info)) movable_paths[npz_filetype].append(npz_path) return movable_paths
def calc_obs_info(s, host, path): ''' generates all relevant data from uv* file Parameters ---------- s | object: session object host | str: host of system path | str: path of uv* file Returns ------- tuple: dict: observation values dict: file values dict: log values ''' base_path, filename, filetype = file_data.file_names(path) source = ':'.join((host, path)) if filetype in ('uv', 'uvcRRE'): time_start, time_end, delta_time, julian_date, polarization, length, obsnum = uv_data.calc_uv_data(host, path) elif filetype in ('npz',): time_start, time_end, delta_time, julian_date, polarization, length, obsnum = uv_data.calc_npz_data(s, filename) era, julian_day, lst = uv_data.date_info(julian_date) timestamp = datetime.datetime.utcnow() obs_info = {'obsnum': obsnum, 'julian_date': julian_date, 'polarization': polarization, 'julian_day': julian_day, 'lst': lst, 'era': era, 'era_type': None, 'length': length, 'time_start': time_start, 'time_end': time_end, 'delta_time': delta_time, 'prev_obs': None, 'next_obs': None, 'is_edge': None, 'timestamp': timestamp} file_info = {'host': host, 'base_path': base_path, 'filename': filename, 'filetype': filetype, 'source': source, 'obsnum': obsnum, 'filesize': file_data.calc_size(host, path), 'md5sum': file_data.calc_md5sum(host, path), 'tape_index': None, 'init_host': host, 'is_tapeable': False, 'is_deletable': False, 'timestamp': timestamp} log_info = {'action': 'add by scan', 'table': None, 'identifier': source, 'log_id': str(uuid.uuid4()), 'timestamp': timestamp} return obs_info, file_info, log_info
def add_data(s, sess): ''' transfer data from paperdistiller database to create data for paperdata tables Parameters ---------- s | object: distiller session object sess | object: data session object Returns ------- dict: movable paths for each filetype ''' obs_table = pdbi.Observation file_table = pdbi.File log_table = pdbi.Log raw_OBSs = get_observations(s) #need to keep dict of list of files to move of each type movable_paths = {'uv': [], 'uvcRRE': [], 'npz': []} named_host = socket.gethostname() for OBS in raw_OBSs: table = ddbi.File FILE = s.query(table).filter(table.obsnum == OBS.obsnum).one() host = FILE.host path = FILE.filename base_path, filename, filetype = file_data.file_names(path) source = ':'.join((host, path)) julian_date = OBS.julian_date polarization = 'all' if julian_date < 2456400 else OBS.pol if host == named_host: try: uv = A.miriad.UV(path) time_start, time_end, delta_time, _ = uv_data.calc_times(uv) except: continue else: time_start, time_end, delta_time, _, _, _, _ = uv_data.calc_uv_data( host, path) era, julian_day, lst = uv_data.date_info(julian_date) md5 = FILE.md5sum if md5 is None: md5 = file_data.calc_md5sum(host, path) timestamp = datetime.datetime.utcnow() obs_info = { 'obsnum': OBS.obsnum, 'julian_date': julian_date, 'polarization': polarization, 'julian_day': julian_day, 'lst': lst, 'era': era, 'era_type': None, 'length': OBS.length, 'time_start': time_start, 'time_end': time_end, 'delta_time': delta_time, 'prev_obs': None, 'next_obs': None, 'is_edge': None, 'timestamp': timestamp } raw_info = { 'host': host, 'base_path': base_path, 'filename': filename, 'filetype': filetype, 'source': source, 'obsnum': OBS.obsnum, 'filesize': file_data.calc_size(host, path), 'md5sum': md5, 'tape_index': None, 'init_host': host, 'is_tapeable': True, 'is_deletable': False, 'timestamp': timestamp } log_info = { 'action': 'add by bridge', 'table': None, 'identifier': source, 'log_id': str(uuid.uuid4()), 'timestamp': timestamp } sess.add(obs_table(**obs_info)) sess.add(file_table(**file_info)) sess.add(log_table(**log_info)) movable_paths[filetype].append(path) compr_filename = ''.join((filename, 'cRRE')) compr_path = os.path.join(base_path, compr_filename) if os.path.isdir(compr_path): compr_filetype = 'uvcRRE' compr_info = copy.deepcopy(raw_info) compr_entry = { 'filename': compr_filename, 'filetype': compr_filetype, 'filesize': file_data.calc_size(host, base_path, compr_filename), 'md5sum': file_data.calc_md5sum(host, base_path, compr_filename), 'is_tapeable': False } compr_info.update(compr_entry) sess.add(file_table(**compr_info)) movable_paths[compr_filetype].append(compr_path) npz_filename = ''.join((filename, 'cRE.npz')) npz_path = os.path.join(base_path, npz_filename) if os.path.isfile(npz_path): npz_filetype = 'npz' npz_info = copy.deepcopy(raw_info) npz_entry = { 'filename': npz_filename, 'filetype': npz_filetype, 'filesize': file_data.calc_size(host, base_path, npz_filename), 'md5sum': file_data.calc_md5sum(host, base_path, npz_filename), 'is_tapeable': False } npz_info.update(npz_entry) sess.add(file_table(**npz_info)) movable_paths[npz_filetype].append(npz_path) return movable_paths
def calc_obs_info(s, host, path): ''' generates all relevant data from uv* file Parameters ---------- s | object: session object host | str: host of system path | str: path of uv* file Returns ------- tuple: dict: observation values dict: file values dict: log values ''' base_path, filename, filetype = file_data.file_names(path) source = ':'.join((host, path)) if filetype in ('uv', 'uvcRRE'): time_start, time_end, delta_time, julian_date, polarization, length, obsnum = uv_data.calc_uv_data( host, path, username='******') elif filetype in ('npz', ): time_start, time_end, delta_time, julian_date, polarization, length, obsnum = uv_data.calc_npz_data( s, filename, username='******') era, julian_day, lst = uv_data.date_info(julian_date) timestamp = datetime.datetime.utcnow() obs_info = { 'obsnum': obsnum, 'julian_date': julian_date, 'polarization': polarization, 'julian_day': julian_day, 'lst': lst, 'era': era, 'era_type': None, 'length': length, 'time_start': time_start, 'time_end': time_end, 'delta_time': delta_time, 'prev_obs': None, 'next_obs': None, 'is_edge': None, 'timestamp': timestamp } file_info = { 'host': host, 'base_path': base_path, 'filename': filename, 'filetype': filetype, 'source': source, 'obsnum': obsnum, 'filesize': file_data.calc_size(host, path, username='******'), 'md5sum': file_data.calc_md5sum(host, path, username='******'), 'tape_index': None, 'init_host': host, 'is_tapeable': False, 'is_deletable': False, 'timestamp': timestamp } log_info = { 'action': 'add by scan', 'table': None, 'identifier': source, 'log_id': str(uuid.uuid4()), 'timestamp': timestamp } return obs_info, file_info, log_info