Exemple #1
0
def update_md5(s):
    '''
    updates md5sums for all files without in database

    Parameters
    ----------
    s | object: session object
    '''
    table = pdbi.File
    FILEs = s.query(table).filter_by(md5sum=None).all()
    for FILE in FILEs:
        source = FILE.source
        timestamp = datetime.datetime.utcnow()
        md5_dict = {
            'md5sum': file_data.calc_md5sum(FILE.host, source),
            'timestamp': timestamp
        }
        for field, value in md5_dict.items():
            setattr(FILE, field, value)

        log_data = {
            'action': 'update md5sum',
            'table': 'File',
            'identifier': source,
            'log_id': str(uuid.uuid4()),
            'timestamp': timestamp
        }
        s.add(pdbi.Log(**log_data))
Exemple #2
0
    def test_md5(self):
        c_md5 = file_data.get_md5sum(self.uv_file)
        md5 = '3187fa8ca0bd000ac984bc33369363b8'
        self.assertEqual(c_md5, md5, msg='md5sum generated is wrong')

        nc_md5 = file_data.calc_md5sum('folio.sas.upenn.edu', self.uv_file)
        self.assertEqual(nc_md5, md5, msg='md5sum generated is wrong')
Exemple #3
0
    def test_md5(self):
        c_md5 = file_data.get_md5sum(self.uv_file)
        md5 = '3187fa8ca0bd000ac984bc33369363b8'
        self.assertEqual(c_md5, md5, msg='md5sum generated is wrong')

        nc_md5 = file_data.calc_md5sum('folio.sas.upenn.edu', self.uv_file)
        self.assertEqual(nc_md5, md5, msg='md5sum generated is wrong')
Exemple #4
0
def update_md5(s):
    '''
    updates md5sums for all files without in database

    Parameters
    ----------
    s | object: session object
    '''
    table = ddbi.File
    FILEs = s.query(table).filter_by(md5sum=None).all()
    for FILE in FILEs:
        FILE.md5sum = file_data.calc_md5sum(FILE.host, FILE.filename)
Exemple #5
0
def update_md5(s):
    '''
    updates md5sums for all files without in database

    Parameters
    ----------
    s | object: session object
    '''
    table = pdbi.File
    FILEs = s.query(table).filter_by(md5sum=None).all()
    for FILE in FILEs:
        source = FILE.source
        timestamp = datetime.datetime.utcnow()
        md5_dict = {'md5sum': file_data.calc_md5sum(FILE.host, source),
                    'timestamp': timestamp}
        for field, value in md5_dict.items():
            setattr(FILE, field, value)

        log_data = {'action': 'update md5sum',
                    'table': 'File',
                    'identifier': source,
                    'log_id': str(uuid.uuid4()),
                    'timestamp': timestamp}
        s.add(pdbi.Log(**log_data))
def add_data(s, sess):
    '''
    transfer data from paperdistiller database to create data for paperdata tables

    Parameters
    ----------
    s | object: distiller session object
    sess | object: data session object

    Returns
    -------
    dict: movable paths for each filetype
    '''
    obs_table = pdbi.Observation
    file_table = pdbi.File
    log_table = pdbi.Log

    raw_OBSs = get_observations(s)

    #need to keep dict of list of files to move of each type
    movable_paths = {'uv':[], 'uvcRRE':[], 'npz':[]}

    named_host = socket.gethostname()
    for OBS in raw_OBSs:
        table = ddbi.File
        FILE = s.query(table).filter(table.obsnum == OBS.obsnum).one()

        host = FILE.host
        path = FILE.filename
        base_path, filename, filetype = file_data.file_names(path)
        source = ':'.join((host, path))

        julian_date = OBS.julian_date
        polarization = 'all' if julian_date < 2456400 else OBS.pol

        if host == named_host:
            try:
                uv = A.miriad.UV(path)
                time_start, time_end, delta_time, _  = uv_data.calc_times(uv)
            except:
                continue
        else:
            time_start, time_end, delta_time, _, _, _, _ = uv_data.calc_uv_data(host, path)

        era, julian_day, lst = uv_data.date_info(julian_date)

        md5 = FILE.md5sum
        if md5 is None:
            md5 = file_data.calc_md5sum(host, path)

        timestamp = datetime.datetime.utcnow()

        obs_info = {'obsnum': OBS.obsnum,
                    'julian_date': julian_date,
                    'polarization': polarization,
                    'julian_day': julian_day,
                    'lst': lst,
                    'era': era,
                    'era_type': None,
                    'length': OBS.length,
                    'time_start': time_start,
                    'time_end': time_end,
                    'delta_time': delta_time,
                    'prev_obs': None,
                    'next_obs': None,
                    'is_edge': None,
                    'timestamp': timestamp}

        raw_info = {'host': host,
                    'base_path': base_path,
                    'filename': filename,
                    'filetype': filetype,
                    'source': source,
                    'obsnum': OBS.obsnum,
                    'filesize': file_data.calc_size(host, path),
                    'md5sum': md5,
                    'tape_index': None,
                    'init_host': host,
                    'is_tapeable': True,
                    'is_deletable': False,
                    'timestamp': timestamp}

        log_info = {'action': 'add by bridge',
                    'table': None,
                    'identifier': source,
                    'log_id': str(uuid.uuid4()),
                    'timestamp': timestamp}

        sess.add(obs_table(**obs_info))
        sess.add(file_table(**file_info))
        sess.add(log_table(**log_info))
        movable_paths[filetype].append(path)

        compr_filename = ''.join((filename, 'cRRE'))
        compr_path = os.path.join(base_path, compr_filename)
        if os.path.isdir(compr_path):
            compr_filetype = 'uvcRRE'
            compr_info = copy.deepcopy(raw_info)
            compr_entry = {'filename': compr_filename,
                           'filetype': compr_filetype,
                           'filesize': file_data.calc_size(host, base_path, compr_filename),
                           'md5sum': file_data.calc_md5sum(host, base_path, compr_filename),
                           'is_tapeable': False}
            compr_info.update(compr_entry)
            sess.add(file_table(**compr_info))
            movable_paths[compr_filetype].append(compr_path)

        npz_filename = ''.join((filename, 'cRE.npz'))
        npz_path = os.path.join(base_path, npz_filename)
        if os.path.isfile(npz_path):
            npz_filetype = 'npz'
            npz_info = copy.deepcopy(raw_info)
            npz_entry = {'filename': npz_filename,
                         'filetype': npz_filetype,
                         'filesize': file_data.calc_size(host, base_path, npz_filename),
                         'md5sum': file_data.calc_md5sum(host, base_path, npz_filename),
                         'is_tapeable': False}
            npz_info.update(npz_entry)
            sess.add(file_table(**npz_info))
            movable_paths[npz_filetype].append(npz_path)

    return movable_paths
Exemple #7
0
def calc_obs_info(s, host, path):
    '''
    generates all relevant data from uv* file

    Parameters
    ----------
    s | object: session object
    host | str: host of system
    path | str: path of uv* file

    Returns
    -------
    tuple:
        dict: observation values
        dict: file values
        dict: log values
    '''
    base_path, filename, filetype = file_data.file_names(path)
    source = ':'.join((host, path))

    if filetype in ('uv', 'uvcRRE'):
        time_start, time_end, delta_time, julian_date, polarization, length, obsnum = uv_data.calc_uv_data(host, path)
    elif filetype in ('npz',):
        time_start, time_end, delta_time, julian_date, polarization, length, obsnum = uv_data.calc_npz_data(s, filename)

    era, julian_day, lst = uv_data.date_info(julian_date)

    timestamp = datetime.datetime.utcnow()

    obs_info = {'obsnum': obsnum,
                'julian_date': julian_date,
                'polarization': polarization,
                'julian_day': julian_day,
                'lst': lst,
                'era': era,
                'era_type': None,
                'length': length,
                'time_start': time_start,
                'time_end': time_end,
                'delta_time': delta_time,
                'prev_obs': None, 
                'next_obs': None,
                'is_edge': None,
                'timestamp': timestamp}

    file_info = {'host': host,
                 'base_path': base_path,
                 'filename': filename,
                 'filetype': filetype,
                 'source': source,
                 'obsnum': obsnum,
                 'filesize': file_data.calc_size(host, path),
                 'md5sum': file_data.calc_md5sum(host, path),
                 'tape_index': None,
                 'init_host': host,
                 'is_tapeable': False,
                 'is_deletable': False,
                 'timestamp': timestamp}

    log_info = {'action': 'add by scan',
                'table': None,
                'identifier': source,
                'log_id': str(uuid.uuid4()),
                'timestamp': timestamp}

    return obs_info, file_info, log_info
Exemple #8
0
def add_data(s, sess):
    '''
    transfer data from paperdistiller database to create data for paperdata tables

    Parameters
    ----------
    s | object: distiller session object
    sess | object: data session object

    Returns
    -------
    dict: movable paths for each filetype
    '''
    obs_table = pdbi.Observation
    file_table = pdbi.File
    log_table = pdbi.Log

    raw_OBSs = get_observations(s)

    #need to keep dict of list of files to move of each type
    movable_paths = {'uv': [], 'uvcRRE': [], 'npz': []}

    named_host = socket.gethostname()
    for OBS in raw_OBSs:
        table = ddbi.File
        FILE = s.query(table).filter(table.obsnum == OBS.obsnum).one()

        host = FILE.host
        path = FILE.filename
        base_path, filename, filetype = file_data.file_names(path)
        source = ':'.join((host, path))

        julian_date = OBS.julian_date
        polarization = 'all' if julian_date < 2456400 else OBS.pol

        if host == named_host:
            try:
                uv = A.miriad.UV(path)
                time_start, time_end, delta_time, _ = uv_data.calc_times(uv)
            except:
                continue
        else:
            time_start, time_end, delta_time, _, _, _, _ = uv_data.calc_uv_data(
                host, path)

        era, julian_day, lst = uv_data.date_info(julian_date)

        md5 = FILE.md5sum
        if md5 is None:
            md5 = file_data.calc_md5sum(host, path)

        timestamp = datetime.datetime.utcnow()

        obs_info = {
            'obsnum': OBS.obsnum,
            'julian_date': julian_date,
            'polarization': polarization,
            'julian_day': julian_day,
            'lst': lst,
            'era': era,
            'era_type': None,
            'length': OBS.length,
            'time_start': time_start,
            'time_end': time_end,
            'delta_time': delta_time,
            'prev_obs': None,
            'next_obs': None,
            'is_edge': None,
            'timestamp': timestamp
        }

        raw_info = {
            'host': host,
            'base_path': base_path,
            'filename': filename,
            'filetype': filetype,
            'source': source,
            'obsnum': OBS.obsnum,
            'filesize': file_data.calc_size(host, path),
            'md5sum': md5,
            'tape_index': None,
            'init_host': host,
            'is_tapeable': True,
            'is_deletable': False,
            'timestamp': timestamp
        }

        log_info = {
            'action': 'add by bridge',
            'table': None,
            'identifier': source,
            'log_id': str(uuid.uuid4()),
            'timestamp': timestamp
        }

        sess.add(obs_table(**obs_info))
        sess.add(file_table(**file_info))
        sess.add(log_table(**log_info))
        movable_paths[filetype].append(path)

        compr_filename = ''.join((filename, 'cRRE'))
        compr_path = os.path.join(base_path, compr_filename)
        if os.path.isdir(compr_path):
            compr_filetype = 'uvcRRE'
            compr_info = copy.deepcopy(raw_info)
            compr_entry = {
                'filename': compr_filename,
                'filetype': compr_filetype,
                'filesize': file_data.calc_size(host, base_path,
                                                compr_filename),
                'md5sum': file_data.calc_md5sum(host, base_path,
                                                compr_filename),
                'is_tapeable': False
            }
            compr_info.update(compr_entry)
            sess.add(file_table(**compr_info))
            movable_paths[compr_filetype].append(compr_path)

        npz_filename = ''.join((filename, 'cRE.npz'))
        npz_path = os.path.join(base_path, npz_filename)
        if os.path.isfile(npz_path):
            npz_filetype = 'npz'
            npz_info = copy.deepcopy(raw_info)
            npz_entry = {
                'filename': npz_filename,
                'filetype': npz_filetype,
                'filesize': file_data.calc_size(host, base_path, npz_filename),
                'md5sum': file_data.calc_md5sum(host, base_path, npz_filename),
                'is_tapeable': False
            }
            npz_info.update(npz_entry)
            sess.add(file_table(**npz_info))
            movable_paths[npz_filetype].append(npz_path)

    return movable_paths
Exemple #9
0
def calc_obs_info(s, host, path):
    '''
    generates all relevant data from uv* file

    Parameters
    ----------
    s | object: session object
    host | str: host of system
    path | str: path of uv* file

    Returns
    -------
    tuple:
        dict: observation values
        dict: file values
        dict: log values
    '''
    base_path, filename, filetype = file_data.file_names(path)
    source = ':'.join((host, path))

    if filetype in ('uv', 'uvcRRE'):
        time_start, time_end, delta_time, julian_date, polarization, length, obsnum = uv_data.calc_uv_data(
            host, path, username='******')
    elif filetype in ('npz', ):
        time_start, time_end, delta_time, julian_date, polarization, length, obsnum = uv_data.calc_npz_data(
            s, filename, username='******')

    era, julian_day, lst = uv_data.date_info(julian_date)

    timestamp = datetime.datetime.utcnow()

    obs_info = {
        'obsnum': obsnum,
        'julian_date': julian_date,
        'polarization': polarization,
        'julian_day': julian_day,
        'lst': lst,
        'era': era,
        'era_type': None,
        'length': length,
        'time_start': time_start,
        'time_end': time_end,
        'delta_time': delta_time,
        'prev_obs': None,
        'next_obs': None,
        'is_edge': None,
        'timestamp': timestamp
    }

    file_info = {
        'host': host,
        'base_path': base_path,
        'filename': filename,
        'filetype': filetype,
        'source': source,
        'obsnum': obsnum,
        'filesize': file_data.calc_size(host, path, username='******'),
        'md5sum': file_data.calc_md5sum(host, path, username='******'),
        'tape_index': None,
        'init_host': host,
        'is_tapeable': False,
        'is_deletable': False,
        'timestamp': timestamp
    }

    log_info = {
        'action': 'add by scan',
        'table': None,
        'identifier': source,
        'log_id': str(uuid.uuid4()),
        'timestamp': timestamp
    }

    return obs_info, file_info, log_info