Python parse_and_store_ts_dataの例、cesium.data_management.parse_and_store_ts_data Pythonの例

コード例 #1

0

ファイルを表示

ファイル: fixtures.py プロジェクト: jiesuncal/cesium_web

def create_test_dataset(project, label_type='class'):
    """Create and yield test labeled dataset, then delete.

    Params
    ------
    project : `models.Project` instance
        The project under which to create test dataset.
    label_type  : str
        String indicating whether data labels are class names ('class')
        for classification, or numerical values for regression (anything other
        than 'class'). Defaults to 'class'.

    """
    if label_type == 'class':
        header = pjoin(os.path.dirname(__file__),
                       'data', 'asas_training_subset_classes.dat')
    elif label_type == 'regr':
        header = pjoin(os.path.dirname(__file__),
                       'data', 'asas_training_subset_targets.dat')
    tarball = pjoin(os.path.dirname(__file__),
                    'data', 'asas_training_subset.tar.gz')
    header = shutil.copy2(header, cfg['paths']['upload_folder'])
    tarball = shutil.copy2(tarball, cfg['paths']['upload_folder'])
    ts_paths = data_management.parse_and_store_ts_data(
        tarball, cfg['paths']['ts_data_folder'], header)
    d = m.Dataset.add(name='test_ds', project=project, file_uris=ts_paths)
    d.save()
    try:
        yield d
    finally:
        d.delete_instance()

コード例 #2

0

ファイルを表示

ファイル: test_data_management.py プロジェクト: acrellin/cesium

def test_parsing_and_saving(tmpdir):
    data_file_path = pjoin(DATA_PATH, "215153_215176_218272_218934.tar.gz")
    header_path = pjoin(DATA_PATH, "215153_215176_218272_218934_metadata.dat")
    time_series = data_management.parse_and_store_ts_data(
        data_file_path, str(tmpdir), header_path, cleanup_archive=False,
        cleanup_header=False)
    for ts in time_series:
        assert isinstance(ts, str)
        assert os.path.exists(ts)

    time_series = data_management.parse_and_store_ts_data(
        data_file_path, str(tmpdir), None, cleanup_archive=False,
        cleanup_header=False)
    for ts in time_series:
        assert isinstance(ts, str)
        assert os.path.exists(ts)

コード例 #3

0

ファイルを表示

def create_test_dataset(project, label_type='class'):
    """Create and yield test labeled dataset, then delete.

    Params
    ------
    project : `models.Project` instance
        The project under which to create test dataset.
    label_type  : str
        String indicating whether data labels are class names ('class')
        for classification, or numerical values for regression (anything other
        than 'class'). Defaults to 'class'.

    """
    if label_type == 'class':
        header = pjoin(os.path.dirname(__file__),
                       'data', 'asas_training_subset_classes.dat')
    elif label_type == 'regr':
        header = pjoin(os.path.dirname(__file__),
                       'data', 'asas_training_subset_targets.dat')
    tarball = pjoin(os.path.dirname(__file__),
                    'data', 'asas_training_subset.tar.gz')
    header = shutil.copy2(header, cfg['paths']['upload_folder'])
    tarball = shutil.copy2(tarball, cfg['paths']['upload_folder'])
    ts_paths = data_management.parse_and_store_ts_data(
        tarball, cfg['paths']['ts_data_folder'], header)
    d = m.Dataset.add(name='test_ds', project=project, file_uris=ts_paths)
    d.save()
    try:
        yield d
    finally:
        d.delete_instance()

コード例 #4

0

ファイルを表示

ファイル: test_data_management.py プロジェクト: BenJamesbabala/cesium

def test_parsing_and_saving():
    data_file_path = pjoin(DATA_PATH, "215153_215176_218272_218934.tar.gz")
    header_path = pjoin(DATA_PATH, "215153_215176_218272_218934_metadata.dat")
    time_series = data_management.parse_and_store_ts_data(data_file_path,
                      TEMP_DIR, header_path, cleanup_archive=False,
                      cleanup_header=False)
    for ts in time_series:
        assert all(f in ['meta1', 'meta2', 'meta3']
                   for f in ts.meta_features.keys())
        assert(len(ts.time) == len(ts.measurement)
               and len(ts.time) == len(ts.error))

コード例 #5

0

ファイルを表示

ファイル: test_data_management.py プロジェクト: chaorun/cesium

def test_parsing_and_saving():
    data_file_path = pjoin(DATA_PATH, "215153_215176_218272_218934.tar.gz")
    header_path = pjoin(DATA_PATH, "215153_215176_218272_218934_metadata.dat")
    time_series = data_management.parse_and_store_ts_data(
        data_file_path,
        TEMP_DIR,
        header_path,
        cleanup_archive=False,
        cleanup_header=False)
    for ts in time_series:
        assert isinstance(ts, str)
        assert os.path.exists(ts)

コード例 #6

0

ファイルを表示

ファイル: dataset.py プロジェクト: weiwzhang/cesium_web

    def post(self):
        if not 'tarFile' in self.request.files:
            return self.error('No tar file uploaded')

        zipfile = self.request.files['tarFile'][0]

        if zipfile.filename == '':
            return self.error('Empty tar file uploaded')

        dataset_name = self.get_argument('datasetName')
        project_id = self.get_argument('projectID')

        zipfile_name = (str(uuid.uuid4()) + "_" +
                        util.secure_filename(zipfile.filename))
        zipfile_path = pjoin(cfg['paths']['upload_folder'], zipfile_name)

        with open(zipfile_path, 'wb') as f:
            f.write(zipfile['body'])

        # Header file is optional for unlabled data w/o metafeatures
        if 'headerFile' in self.request.files:
            headerfile = self.request.files['headerFile'][0]
            headerfile_name = (str(uuid.uuid4()) + "_" +
                               util.secure_filename(headerfile.filename))
            headerfile_path = pjoin(cfg['paths']['upload_folder'], headerfile_name)

            with open(headerfile_path, 'wb') as f:
                f.write(headerfile['body'])

        else:
            headerfile_path = None

        p = Project.get(Project.id == project_id)
        # TODO this should give unique names to the time series files
        ts_paths = data_management.parse_and_store_ts_data(
            zipfile_path,
            cfg['paths']['ts_data_folder'],
            headerfile_path)
        meta_features = list(time_series.from_netcdf(ts_paths[0])
                             .meta_features.keys())
        unique_ts_paths = [os.path.join(os.path.dirname(ts_path),
                                        str(uuid.uuid4()) + "_" +
                                        util.secure_filename(ts_path))
                           for ts_path in ts_paths]
        for old_path, new_path in zip(ts_paths, unique_ts_paths):
            os.rename(old_path, new_path)
        file_names = [shorten_fname(ts_path) for ts_path in ts_paths]
        d = Dataset.add(name=dataset_name, project=p, file_names=file_names,
                        file_uris=unique_ts_paths, meta_features=meta_features)

        return self.success(d, 'cesium/FETCH_DATASETS')

コード例 #7

0

ファイルを表示

def test_parsing_and_saving():
    data_file_path = pjoin(DATA_PATH, "215153_215176_218272_218934.tar.gz")
    header_path = pjoin(DATA_PATH, "215153_215176_218272_218934_metadata.dat")
    time_series = data_management.parse_and_store_ts_data(
        data_file_path,
        TEMP_DIR,
        header_path,
        cleanup_archive=False,
        cleanup_header=False)
    for ts in time_series:
        assert all(f in ['meta1', 'meta2', 'meta3']
                   for f in ts.meta_features.keys())
        assert (len(ts.time) == len(ts.measurement)
                and len(ts.time) == len(ts.error))

コード例 #8

0

ファイルを表示

    def add_files(dataset, create, value, *args, **kwargs):
        if not create:
            return

        if 'class' in dataset.name:
            header = pjoin(os.path.dirname(__file__), 'data',
                           'asas_training_subset_classes.dat')
        elif 'regr' in dataset.name:
            header = pjoin(os.path.dirname(__file__), 'data',
                           'asas_training_subset_targets.dat')
        else:
            header = None
        tarball = pjoin(os.path.dirname(__file__), 'data',
                        'asas_training_subset.tar.gz')
        header = shutil.copy2(header, TMP_DIR) if header else None
        tarball = shutil.copy2(tarball, TMP_DIR)
        ts_paths = data_management.parse_and_store_ts_data(
            tarball, TMP_DIR, header)

        dataset.files = [DatasetFile(uri=uri) for uri in ts_paths]
        DBSession().commit()

コード例 #9

0

ファイルを表示

ファイル: flask_server.py プロジェクト: stefanv/cesium_web

def Dataset(dataset_id=None):
    """
    """
    if dataset_id:
        d = m.Dataset.get(m.Dataset.id == dataset_id)
        if not d.project.is_owned_by(get_username()):
            raise error('Unauthorized access')

    if request.method == 'POST':
        form = request.form

        if not 'headerFile' in request.files:
            return error('No header file uploaded')

        if not 'tarFile' in request.files:
            return error('No tar file uploaded')

        headerfile = request.files['headerFile']
        zipfile = request.files['tarFile']

        if zipfile.filename == '':
            return error('Empty tar file uploaded')

        if headerfile.filename == '':
            return error('Empty header file uploaded')

        dataset_name = form['datasetName']
        project_id = form['projectID']

        # files have the following attributes:
        #
        # 'close', 'content_length', 'content_type', 'filename', 'headers',
        # 'mimetype', 'mimetype_params', 'name', 'save', 'stream']

        # Create unique file names
        headerfile_name = (str(uuid.uuid4()) + "_" +
                           str(secure_filename(headerfile.filename)))
        zipfile_name = (str(uuid.uuid4()) + "_" +
                        str(secure_filename(zipfile.filename)))
        headerfile_path = pjoin(cfg['paths']['upload_folder'], headerfile_name)
        zipfile_path = pjoin(cfg['paths']['upload_folder'], zipfile_name)
        headerfile.save(headerfile_path)
        zipfile.save(zipfile_path)

        p = m.Project.get(m.Project.id == project_id)
        time_series = data_management.parse_and_store_ts_data(
            zipfile_path,
            cfg['paths']['ts_data_folder'],
            headerfile_path)
        ts_paths = [ts.path for ts in time_series]
        d = m.Dataset.add(name=dataset_name, project=p, file_uris=ts_paths)

        return success(d, 'cesium/FETCH_DATASETS')

    elif request.method == "GET":
        if dataset_id is None:
            datasets = [d for p in m.Project.all(get_username())
                            for d in p.datasets]
        else:
            datasets = d

        return success(datasets)

    elif request.method == "DELETE":
        if dataset_id is None:
            raise error('No dataset specified')

        d.delete_instance()

        return success(action='cesium/FETCH_DATASETS')

    elif request.method == "PUT":
        if dataset_id is None:
            raise error('No dataset specified')

        return error('Dataset updating not yet implemented')

コード例 #10

0

ファイルを表示

ファイル: dataset.py プロジェクト: acrellin/cesium_web

    def post(self):
        data = self.get_json()
        if not 'tarFile' in data:
            return self.error('No tar file uploaded')

        zipfile = data['tarFile']
        tarball_content_types = ('data:application/gzip;base64',
                                 'data:application/x-gzip;base64')

        if not zipfile['body'].startswith(tarball_content_types):
            return self.error('Invalid tar file - please ensure file is gzip '
                              'format.')

        if zipfile['name'] == '':
            return self.error('Empty tar file uploaded')

        dataset_name = data['datasetName']
        project_id = data['projectID']

        zipfile_name = (str(uuid.uuid4()) + "_" +
                        util.secure_filename(zipfile['name']))
        zipfile_path = pjoin(self.cfg['paths:upload_folder'], zipfile_name)

        for prefix in tarball_content_types:
            zipfile['body'] = zipfile['body'].replace(prefix, '')
        with open(zipfile_path, 'wb') as f:
            f.write(base64.b64decode(zipfile['body']))
        try:
            tarfile.open(zipfile_path)
        except tarfile.ReadError:
            os.remove(zipfile_path)
            return self.error('Invalid tar file - please ensure file is gzip '
                              'format.')

        # Header file is optional for unlabled data w/o metafeatures
        if 'headerFile' in data:
            headerfile = data['headerFile']
            headerfile_name = (str(uuid.uuid4()) + "_" +
                               util.secure_filename(headerfile['name']))
            headerfile_path = pjoin(self.cfg['paths:upload_folder'], headerfile_name)

            with open(headerfile_path, 'w') as f:
                f.write(headerfile['body'])

        else:
            headerfile_path = None

        p = Project.query.filter(Project.id == project_id).one()
        ts_paths = data_management.parse_and_store_ts_data(
            zipfile_path,
            self.cfg['paths:ts_data_folder'],
            headerfile_path)
        meta_features = list(time_series.load(ts_paths[0]).meta_features.keys())
        unique_ts_paths = [os.path.join(os.path.dirname(ts_path),
                                        str(uuid.uuid4()) + "_" +
                                        util.secure_filename(ts_path))
                           for ts_path in ts_paths]
        d = Dataset(name=dataset_name, project=p, meta_features=meta_features)
        for old_path, new_path in zip(ts_paths, unique_ts_paths):
            os.rename(old_path, new_path)
            d.files.append(DatasetFile(name=shorten_fname(old_path),
                                       uri=new_path))
        DBSession().add(d)
        DBSession().commit()

        print(f"\n\n Just added a new dataset ({d}): here's DB contents at this point:",
              list(Dataset.query))

        return self.success(d.display_info(), 'cesium/FETCH_DATASETS')

コード例 #11

0

ファイルを表示

ファイル: dataset.py プロジェクト: tymiles003/cesium_web

    def post(self):
        data = self.get_json()
        if not 'tarFile' in data:
            return self.error('No tar file uploaded')

        zipfile = data['tarFile']
        tarball_content_types = ('data:application/gzip;base64',
                                 'data:application/x-gzip;base64')

        if not zipfile['body'].startswith(tarball_content_types):
            return self.error('Invalid tar file - please ensure file is gzip '
                              'format.')

        if zipfile['name'] == '':
            return self.error('Empty tar file uploaded')

        dataset_name = data['datasetName']
        project_id = data['projectID']

        zipfile_name = (str(uuid.uuid4()) + "_" +
                        util.secure_filename(zipfile['name']))
        zipfile_path = pjoin(self.cfg['paths:upload_folder'], zipfile_name)

        for prefix in tarball_content_types:
            zipfile['body'] = zipfile['body'].replace(prefix, '')
        with open(zipfile_path, 'wb') as f:
            f.write(base64.b64decode(zipfile['body']))
        try:
            tarfile.open(zipfile_path)
        except tarfile.ReadError:
            os.remove(zipfile_path)
            return self.error('Invalid tar file - please ensure file is gzip '
                              'format.')

        # Header file is optional for unlabled data w/o metafeatures
        if 'headerFile' in data:
            headerfile = data['headerFile']
            headerfile_name = (str(uuid.uuid4()) + "_" +
                               util.secure_filename(headerfile['name']))
            headerfile_path = pjoin(self.cfg['paths:upload_folder'],
                                    headerfile_name)

            with open(headerfile_path, 'w') as f:
                f.write(headerfile['body'])

        else:
            headerfile_path = None

        p = Project.query.filter(Project.id == project_id).one()
        ts_paths = data_management.parse_and_store_ts_data(
            zipfile_path, self.cfg['paths:ts_data_folder'], headerfile_path)
        meta_features = list(
            time_series.load(ts_paths[0]).meta_features.keys())
        unique_ts_paths = [
            os.path.join(
                os.path.dirname(ts_path),
                str(uuid.uuid4()) + "_" + util.secure_filename(ts_path))
            for ts_path in ts_paths
        ]
        d = Dataset(name=dataset_name, project=p, meta_features=meta_features)
        for old_path, new_path in zip(ts_paths, unique_ts_paths):
            os.rename(old_path, new_path)
            d.files.append(
                DatasetFile(name=shorten_fname(old_path), uri=new_path))
        DBSession().add(d)
        DBSession().commit()

        return self.success(d.display_info(), 'cesium/FETCH_DATASETS')