Python file_copy Examples, shutil.file_copy Python Examples

Example #1

0

Show file

File: test_calwebb_spec3_nrs_msa.py Project: bernie-simon/jwst

def test_run_outlier_only(mk_tmp_dirs):
    """Test a basic run"""
    tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs

    # Copy necessary data to the tmp_data_path
    file_copy(
        path.join(DATAPATH, 'two_member_spec3_asn.json'),
        tmp_data_path
    )
    asn_path = path.join(tmp_data_path, 'two_member_spec3_asn.json')

    with open(asn_path) as fp:
        asn = load_asn(fp)
    for product in asn['products']:
        for member in product['members']:
            file_copy(
                path.join(DATAPATH, 'level2b_twoslit', member['expname']),
                tmp_data_path
            )

    args = [
        path.join(SCRIPT_DATA_PATH, 'calwebb_spec3_default.cfg'),
        asn_path,
        '--steps.mrs_imatch.skip=true',
        '--steps.outlier_detection.save_results=true',
        '--steps.resample_spec.skip=true',
        '--steps.cube_build.skip=true',
        '--steps.extract_1d.skip=true',
    ]

    Step.from_cmdline(args)
    assert False

Example #2

0

Show file

File: test_calwebb_spec3_nrs_msa.py Project: bernie-simon/jwst

def test_run_full(mk_tmp_dirs):
    """Test a full run"""
    tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs

    # Copy necessary data to the tmp_data_path
    file_copy(
        path.join(DATAPATH, 'two_member_spec3_asn.json'),
        tmp_data_path
    )
    asn_path = path.join(tmp_data_path, 'two_member_spec3_asn.json')

    with open(asn_path) as fp:
        asn = load_asn(fp)
    for product in asn['products']:
        for member in product['members']:
            file_copy(
                path.join(DATAPATH, 'level2b_twoslit', member['expname']),
                tmp_data_path
            )

    args = [
        path.join(SCRIPT_DATA_PATH, 'calwebb_spec3_default.cfg'),
        asn_path,
    ]

    Step.from_cmdline(args)
    assert False

Example #3

0

Show file

File: test_calwebb_spec3_nrs_msa.py Project: stscicrawford/test_jwst_rtd

def test_run_full(mk_tmp_dirs):
    """Test a full run"""
    tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs

    # Copy necessary data to the tmp_data_path
    file_copy(
        path.join(DATAPATH, 'two_member_spec3_asn.json'),
        tmp_data_path
    )
    asn_path = path.join(tmp_data_path, 'two_member_spec3_asn.json')

    with open(asn_path) as fp:
        asn = load_asn(fp)
    for product in asn['products']:
        for member in product['members']:
            file_copy(
                path.join(DATAPATH, 'level2b_twoslit', member['expname']),
                tmp_data_path
            )

    args = [
        path.join(SCRIPT_DATA_PATH, 'calwebb_spec3_default.cfg'),
        asn_path,
    ]

    Step.from_cmdline(args)
    assert False

Example #4

0

Show file

def test_run_outlier_only(mk_tmp_dirs):
    """Test a basic run"""
    tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs

    # Copy necessary data to the tmp_data_path
    file_copy(path.join(DATAPATH, 'two_member_spec3_asn.json'), tmp_data_path)
    asn_path = path.join(tmp_data_path, 'two_member_spec3_asn.json')

    with open(asn_path) as fp:
        asn = load_asn(fp)
    for product in asn['products']:
        for member in product['members']:
            file_copy(
                path.join(DATAPATH, 'level2b_twoslit', member['expname']),
                tmp_data_path)

    args = [
        path.join(SCRIPT_DATA_PATH, 'calwebb_spec3_default.cfg'),
        asn_path,
        '--steps.mrs_imatch.skip=true',
        '--steps.outlier_detection.save_results=true',
        '--steps.resample_spec.skip=true',
        '--steps.cube_build.skip=true',
        '--steps.extract_1d.skip=true',
    ]

    Step.from_cmdline(args)
    assert False

Example #5

0

Show file

File: test_calwebb_spec2_nrs_msa.py Project: divatemangesh/jwst

def test_run_msaflagging(mk_tmp_dirs, caplog):
    """Test msa flagging operation"""
    tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs

    # Copy msa config files from DATAPATH to
    # current working directory
    file_copy(path.join(DATAPATH, 'jw95065006001_0_msa_twoslit.fits'), '.')

    asn_path = update_asn_basedir(path.join(
        DATAPATH, 'mos_udf_g235m_twoslit_spec2_asn.json'),
                                  root=path.join(DATAPATH, 'level2a_twoslit'))
    args = [
        path.join(SCRIPT_DATA_PATH, 'calwebb_spec2_basic.cfg'), asn_path,
        '--steps.msa_flagging.skip=false'
    ]

    Step.from_cmdline(args)

    assert 'Step msa_flagging running with args' in caplog.text
    assert 'Step msa_flagging done' in caplog.text

    with open(asn_path) as fp:
        asn = load_asn(fp)

    for product in asn['products']:
        prod_name = product['name'] + '_cal.fits'
        assert path.isfile(prod_name)

Example #6

0

Show file

def install_my_kernel_spec(user=False):
    with TemporaryDirectory() as td:
        os.chmod(td, 0o755)  # Starts off as 700, not user readable
        path_of_file = dirname(abspath(__file__))
        file_copy(path_of_file + "/mikrokosmoskernel/kernel.js", td)
        file_copy(path_of_file + "/mikrokosmoskernel/kernel.json", td)
        print('Installing Jupyter kernel spec')
        install_kernel_spec(td, 'IMikrokosmos', user=user, replace=True)

Example #7

0

Show file

File: upgrade.py Project: robertlayton/aiko_engine_mp

def upgrade_thread():
  global in_progress
  global file_count, manifest_checksum, manifest_size, manifest_url, version
  try:
    common.log("Firmware upgrade start")
    gc.collect()

    manifest_pathname = "manifest"
    shutil.path_remove(manifest_pathname)
# TODO: Remove all "*_new" and "*_old"

    aiko.web_client.http_get_file(manifest_url, manifest_pathname)
# TODO: Verify "manifest_pathname" actual file size versus "manifest_size"
# TODO: Verify "manifest_pathname" checksum

    top_level_files = []
    url_prefix = manifest_url.rpartition("/")[0]
    with open(manifest_pathname, "r") as manifest_file:
      file_index = 0
      for line in manifest_file.readlines():
        file_index += 1
        file_checksum, file_size, filepath = line.split()
        url_suffix = filepath.partition("/")[-1]
        file_url = "/".join([url_prefix, url_suffix])

        pathname = url_suffix.partition("/")
        if not pathname[0] in top_level_files:
          top_level_files.append(pathname[0])
        pathname = "".join([pathname[0] + "_new"] + list(pathname[1:]))

        print(file_url + " --> " + pathname)
        common.log("Firmware get ... %d of %d" % (file_index, file_count))
        aiko.web_client.http_get_file(file_url, pathname)
# TODO: Verify actual file size versus size stated in the "manifest"
# TODO: Verify actual file checksum

    shutil.path_remove(manifest_pathname)
    shutil.file_copy("configuration/net.py",  "configuration_new/net.py")
    shutil.file_copy("configuration/keys.db", "configuration_new/keys.db")

    common.log("Firmware install")
    for file in top_level_files:
      try:
        print("Rename %s to %s" % (file + "_new", file))
        shutil.path_remove(file)
        os.rename(file + "_new", file)
      except OSError:
        print("OSError")

    common.log("Firmware upgrade success !")
    common.log("Please reboot :)")
  except Exception as exception:
    common.log("Firmware upgrade failed :(")
    import sys
    sys.print_exception(exception)
  finally:
    in_progress = False
    version = None

Example #8

0

Show file

File: install.py Project: sebasguts/jupyter-singular

def install_my_kernel_spec(user=True):
    with TemporaryDirectory() as td:
        os.chmod(td, 0o755) # Starts off as 700, not user readable
        with open(os.path.join(td, 'kernel.json'), 'w') as f:
            json.dump(kernel_json, f, sort_keys=True)
        path_of_file = dirname( abspath(__file__) ) + "/resources/"
        file_copy(path_of_file + "logo-32x32.png", td )
        file_copy(path_of_file + "logo-64x64.png", td )
        print('Installing IPython kernel spec')
        install_kernel_spec(td, 'Singular', user=user, replace=True)

Example #9

0

Show file

File: install.py Project: sebasguts/jupyter-gap

def install_my_kernel_spec(user=True):
    with TemporaryDirectory() as td:
        os.chmod(td, 0o755)  # Starts off as 700, not user readable
        with open(os.path.join(td, 'kernel.json'), 'w') as f:
            json.dump(kernel_json, f, sort_keys=True)
        # TODO: Copy resources once they're specified
        path_of_file = dirname(abspath(__file__)) + "/resources/"
        file_copy(path_of_file + "logo-32x32.png", td)
        file_copy(path_of_file + "logo-64x64.png", td)
        print('Installing Jupyter kernel spec from')
        install_kernel_spec(td, 'gap', user=user, replace=True)

Example #10

0

Show file

File: test_calwebb_spec3_nrs_msa.py Project: stscicrawford/test_jwst_rtd

def test_run_extract_1d_resample_mock(mk_tmp_dirs):
    """Test only the extraction step. Should produce nothing
    because extraction requires resampling
    """
    tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs

    # Copy necessary data to the tmp_data_path
    file_copy(
        path.join(DATAPATH, 'two_member_spec3_asn.json'),
        tmp_data_path
    )
    asn_path = path.join(tmp_data_path, 'two_member_spec3_asn.json')

    with open(asn_path) as fp:
        asn = load_asn(fp)
    for product in asn['products']:
        for member in product['members']:
            file_copy(
                path.join(DATAPATH, 'level2b_twoslit', member['expname']),
                tmp_data_path
            )

    args = [
        path.join(SCRIPT_DATA_PATH, 'calwebb_spec3_mock.cfg'),
        asn_path,
        '--steps.mrs_imatch.skip=true',
        '--steps.outlier_detection.skip=true',
        '--steps.cube_build.skip=true',
    ]

    Step.from_cmdline(args)

    # Though the calibration is not run, the conversion to
    # source base has occured. Check
    with open(asn_path) as fd:
        asn = load_asn(fd)
    product_name_template = asn['products'][0]['name']
    product_name_glob = product_name_template.format(
        source_id='s0000[14]',
    ) + '_cal.fits'
    assert len(glob(product_name_glob)) == 2

    product_name_glob = product_name_template.format(
        source_id='s0000[14]',
    ) + '_s2d.fits'
    assert len(glob(product_name_glob)) == 2

    product_name_glob = product_name_template.format(
        source_id='s0000[14]',
    ) + '_x1d.fits'
    assert len(glob(product_name_glob)) == 2

Example #11

0

Show file

File: test_calwebb_spec3_nrs_msa.py Project: bernie-simon/jwst

def test_run_extract_1d_resample_mock(mk_tmp_dirs):
    """Test only the extraction step. Should produce nothing
    because extraction requires resampling
    """
    tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs

    # Copy necessary data to the tmp_data_path
    file_copy(
        path.join(DATAPATH, 'two_member_spec3_asn.json'),
        tmp_data_path
    )
    asn_path = path.join(tmp_data_path, 'two_member_spec3_asn.json')

    with open(asn_path) as fp:
        asn = load_asn(fp)
    for product in asn['products']:
        for member in product['members']:
            file_copy(
                path.join(DATAPATH, 'level2b_twoslit', member['expname']),
                tmp_data_path
            )

    args = [
        path.join(SCRIPT_DATA_PATH, 'calwebb_spec3_mock.cfg'),
        asn_path,
        '--steps.mrs_imatch.skip=true',
        '--steps.outlier_detection.skip=true',
        '--steps.cube_build.skip=true',
    ]

    Step.from_cmdline(args)

    # Though the calibration is not run, the conversion to
    # source base has occured. Check
    with open(asn_path) as fd:
        asn = load_asn(fd)
    product_name_template = asn['products'][0]['name']
    product_name_glob = product_name_template.format(
        source_id='s0000[14]',
    ) + '_cal.fits'
    assert len(glob(product_name_glob)) == 2

    product_name_glob = product_name_template.format(
        source_id='s0000[14]',
    ) + '_s2d.fits'
    assert len(glob(product_name_glob)) == 2

    product_name_glob = product_name_template.format(
        source_id='s0000[14]',
    ) + '_x1d.fits'
    assert len(glob(product_name_glob)) == 2

Example #12

0

Show file

def test_msa_missing_nofail(mk_tmp_dirs, caplog):
    """Test MSA missing failure"""
    tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs

    # Copy necessary data to the tmp_data_path
    input_file = 'F170LP-G235M_MOS_observation-6-c0e0_001_DN_NRS1_mod.fits'
    file_copy(path.join(DATAPATH, 'level2a_twoslit', input_file),
              tmp_data_path)
    args = [
        path.join(SCRIPT_DATA_PATH, 'calwebb_spec2_basic.cfg'),
        path.join(tmp_data_path, input_file), '--fail_on_exception=false'
    ]

    Step.from_cmdline(args)

    assert 'Unable to open MSA FITS file (MSAMETFL)' in caplog.text

Example #13

0

Show file

def test_msa_missing_skip(mk_tmp_dirs, caplog):
    """Test MSA missing failure"""
    tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs

    # Copy necessary data to the tmp_data_path
    input_file = 'F170LP-G235M_MOS_observation-6-c0e0_001_DN_NRS1_mod.fits'
    file_copy(path.join(DATAPATH, 'level2a_twoslit', input_file),
              tmp_data_path)
    args = [
        path.join(SCRIPT_DATA_PATH, 'calwebb_spec2_basic.cfg'),
        path.join(tmp_data_path, input_file), '--steps.assign_wcs.skip=true'
    ]

    Step.from_cmdline(args)

    assert 'Aborting remaining processing for this exposure.' in caplog.text

Example #14

0

Show file

File: test_calwebb_spec3_nrs_msa.py Project: stscicrawford/test_jwst_rtd

def test_run_nosteps(mk_tmp_dirs):
    """Test where no steps execute"""
    tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs

    # Copy necessary data to the tmp_data_path
    file_copy(
        path.join(DATAPATH, 'two_member_spec3_asn.json'),
        tmp_data_path
    )
    asn_path = path.join(tmp_data_path, 'two_member_spec3_asn.json')

    with open(asn_path) as fp:
        asn = load_asn(fp)
    for product in asn['products']:
        for member in product['members']:
            file_copy(
                path.join(DATAPATH, 'level2b_twoslit', member['expname']),
                tmp_data_path
            )

    args = [
        path.join(SCRIPT_DATA_PATH, 'calwebb_spec3_default.cfg'),
        asn_path,
        '--steps.mrs_imatch.skip=true',
        '--steps.outlier_detection.skip=true',
        '--steps.resample_spec.skip=true',
        '--steps.cube_build.skip=true',
        '--steps.extract_1d.skip=true',
    ]

    Step.from_cmdline(args)

    # Check for the Source-based cal name.
    with open(asn_path) as fp:
        asn = load_asn(fp)
    product_name_template = asn['products'][0]['name']
    product_name_glob = product_name_template.format(
        source_id='s0000[14]',
    ) + '_cal.fits'
    assert len(glob(product_name_glob)) == 2

    # Check that no other products built
    files = glob('*s3d*')
    files.extend(glob('*s2d*'))
    files.extend(glob('*x1d*'))
    assert not files

Example #15

0

Show file

File: test_calwebb_spec3_nrs_msa.py Project: bernie-simon/jwst

def test_run_nosteps(mk_tmp_dirs):
    """Test where no steps execute"""
    tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs

    # Copy necessary data to the tmp_data_path
    file_copy(
        path.join(DATAPATH, 'two_member_spec3_asn.json'),
        tmp_data_path
    )
    asn_path = path.join(tmp_data_path, 'two_member_spec3_asn.json')

    with open(asn_path) as fp:
        asn = load_asn(fp)
    for product in asn['products']:
        for member in product['members']:
            file_copy(
                path.join(DATAPATH, 'level2b_twoslit', member['expname']),
                tmp_data_path
            )

    args = [
        path.join(SCRIPT_DATA_PATH, 'calwebb_spec3_default.cfg'),
        asn_path,
        '--steps.mrs_imatch.skip=true',
        '--steps.outlier_detection.skip=true',
        '--steps.resample_spec.skip=true',
        '--steps.cube_build.skip=true',
        '--steps.extract_1d.skip=true',
    ]

    Step.from_cmdline(args)

    # Check for the Source-based cal name.
    with open(asn_path) as fp:
        asn = load_asn(fp)
    product_name_template = asn['products'][0]['name']
    product_name_glob = product_name_template.format(
        source_id='s0000[14]',
    ) + '_cal.fits'
    assert len(glob(product_name_glob)) == 2

    # Check that no other products built
    files = glob('*s3d*')
    files.extend(glob('*s2d*'))
    files.extend(glob('*x1d*'))
    assert not files

Example #16

0

Show file

File: test_calwebb_spec3_nrs_msa.py Project: bernie-simon/jwst

def test_run_outlier_only_mock(mk_tmp_dirs):
    """Test a basic run"""
    tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs

    # Copy necessary data to the tmp_data_path
    file_copy(
        path.join(DATAPATH, 'two_member_spec3_asn.json'),
        tmp_data_path
    )
    asn_path = path.join(tmp_data_path, 'two_member_spec3_asn.json')

    with open(asn_path) as fp:
        asn = load_asn(fp)
    for product in asn['products']:
        for member in product['members']:
            file_copy(
                path.join(DATAPATH, 'level2b_twoslit', member['expname']),
                tmp_data_path
            )

    args = [
        path.join(SCRIPT_DATA_PATH, 'calwebb_spec3_mock.cfg'),
        asn_path,
        '--steps.mrs_imatch.skip=true',
        '--steps.resample_spec.skip=true',
        '--steps.cube_build.skip=true',
        '--steps.extract_1d.skip=true',
    ]

    Step.from_cmdline(args)

    # Check for the Source-based cal name.
    with open(asn_path) as fp:
        asn = load_asn(fp)
    product_name_template = asn['products'][0]['name']
    product_name_glob = product_name_template.format(
        source_id='s0000[14]',
    ) + '_cal.fits'
    assert len(glob(product_name_glob)) == 2

    # Check for the outlier resutls
    product_name_glob = product_name_template.format(
        source_id='s0000[14]',
    ) + '_crj.fits'
    assert len(glob(product_name_glob)) == 2

Example #17

0

Show file

File: test_calwebb_spec3_nrs_msa.py Project: stscicrawford/test_jwst_rtd

def test_run_outlier_only_mock(mk_tmp_dirs):
    """Test a basic run"""
    tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs

    # Copy necessary data to the tmp_data_path
    file_copy(
        path.join(DATAPATH, 'two_member_spec3_asn.json'),
        tmp_data_path
    )
    asn_path = path.join(tmp_data_path, 'two_member_spec3_asn.json')

    with open(asn_path) as fp:
        asn = load_asn(fp)
    for product in asn['products']:
        for member in product['members']:
            file_copy(
                path.join(DATAPATH, 'level2b_twoslit', member['expname']),
                tmp_data_path
            )

    args = [
        path.join(SCRIPT_DATA_PATH, 'calwebb_spec3_mock.cfg'),
        asn_path,
        '--steps.mrs_imatch.skip=true',
        '--steps.resample_spec.skip=true',
        '--steps.cube_build.skip=true',
        '--steps.extract_1d.skip=true',
    ]

    Step.from_cmdline(args)

    # Check for the Source-based cal name.
    with open(asn_path) as fp:
        asn = load_asn(fp)
    product_name_template = asn['products'][0]['name']
    product_name_glob = product_name_template.format(
        source_id='s0000[14]',
    ) + '_cal.fits'
    assert len(glob(product_name_glob)) == 2

    # Check for the outlier resutls
    product_name_glob = product_name_template.format(
        source_id='s0000[14]',
    ) + '_crj.fits'
    assert len(glob(product_name_glob)) == 2

Example #18

0

Show file

    def _store_output_data(
        data_object_path,
        data_directory,
        output_to_store,
        results_by_id,
    ):
        """Collects all of the simulation to store, and saves it into a directory
        whose path will be passed to the storage backend to process.

        Parameters
        ----------
        data_object_path: str
            The file path to serialize the data object to.
        data_directory: str
            The path of the directory to store ancillary data in.
        output_to_store: BaseStoredData
            An object which contains `ProtocolPath`s pointing to the
            data to store.
        results_by_id: dict of ProtocolPath and any
            The results of the protocols which formed the property
            estimation workflow.
        """

        makedirs(data_directory, exist_ok=True)

        for attribute_name in output_to_store.get_attributes(StorageAttribute):

            attribute = getattr(output_to_store.__class__, attribute_name)
            attribute_value = getattr(output_to_store, attribute_name)

            if not isinstance(attribute_value, ProtocolPath):
                continue

            attribute_value = results_by_id[attribute_value]

            if issubclass(attribute.type_hint, FilePath):
                file_copy(attribute_value, data_directory)
                attribute_value = path.basename(attribute_value)

            setattr(output_to_store, attribute_name, attribute_value)

        with open(data_object_path, "w") as file:
            json.dump(output_to_store, file, cls=TypedJSONEncoder)

Example #19

0

Show file

File: install.py Project: sebasguts/jupyter-polymake

def install_my_kernel_spec(user=True):
    with TemporaryDirectory() as td:
        os.chmod(td, 0o755) # Starts off as 700, not user readable
        with open(os.path.join(td, 'kernel.json'), 'w') as f:
            json.dump(kernel_json, f, sort_keys=True)
        path_of_file = dirname( abspath(__file__) ) + "/resources/"
        filenames=[ "Detector.js", "three.js", "kernel.js"  ]
        filenames_renderer=[ "CanvasRenderer.js", "Projector.js" ]
        filenames_control=[ "TrackballControls.js" ]
        for i in filenames:
            file_copy(path_of_file + i, td )
        os.mkdir( td + "renderers", mode=755 )
        for i in filenames_renderer:
            file_copy(path_of_file + "renderers/" + i, td + "renderers" )
        os.mkdir( td + "controls", mode=755 )
        for i in filenames_control:
            file_copy(path_of_file + "controls/" + i, td + "controls" )
        file_copy(path_of_file + "logo-32x32.png", td )
        file_copy(path_of_file + "logo-64x64.png", td )
        print('Installing IPython kernel spec')
        install_kernel_spec(td, 'polymake', user=user, replace=True)

Example #20

0

Show file

def test_run_msaflagging(mk_tmp_dirs, caplog):
    """Test msa flagging operation"""
    tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs

    # Copy necessary data to the tmp_data_path
    file_copy(path.join(DATAPATH, 'jw95065006001_0_msa_twoslit.fits'),
              tmp_data_path)
    file_copy(path.join(DATAPATH, 'mos_udf_g235m_twoslit_spec2_asn.json'),
              tmp_data_path)
    asn_path = path.join(tmp_data_path, 'mos_udf_g235m_twoslit_spec2_asn.json')
    with open(asn_path) as fp:
        asn = load_asn(fp)
    for product in asn['products']:
        for member in product['members']:
            file_copy(
                path.join(DATAPATH, 'level2a_twoslit', member['expname']),
                tmp_data_path)

    args = [
        path.join(SCRIPT_DATA_PATH, 'calwebb_spec2_basic.cfg'), asn_path,
        '--steps.msa_flagging.skip=false'
    ]

    Step.from_cmdline(args)

    assert 'Step msa_flagging running with args' in caplog.text
    assert 'Step msa_flagging done' in caplog.text

    for product in asn['products']:
        prod_name = product['name'] + '_cal.fits'
        assert path.isfile(prod_name)

Example #21

0

Show file

File: test_calwebb_spec2_nrs_msa.py Project: bernie-simon/jwst

def test_msa_missing_nofail(mk_tmp_dirs, caplog):
    """Test MSA missing failure"""
    tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs

    # Copy necessary data to the tmp_data_path
    input_file = 'F170LP-G235M_MOS_observation-6-c0e0_001_DN_NRS1_mod.fits'
    file_copy(
        path.join(
            DATAPATH,
            'level2a_twoslit',
            input_file
        ),
        tmp_data_path
    )
    args = [
        path.join(SCRIPT_DATA_PATH, 'calwebb_spec2_basic.cfg'),
        path.join(tmp_data_path, input_file),
        '--fail_on_exception=false'
    ]

    Step.from_cmdline(args)

    assert 'Unable to open MSA FITS file (MSAMETFL)' in caplog.text

Example #22

0

Show file

File: test_calwebb_spec2_nrs_msa.py Project: bernie-simon/jwst

def test_msa_missing_skip(mk_tmp_dirs, caplog):
    """Test MSA missing failure"""
    tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs

    # Copy necessary data to the tmp_data_path
    input_file = 'F170LP-G235M_MOS_observation-6-c0e0_001_DN_NRS1_mod.fits'
    file_copy(
        path.join(
            DATAPATH,
            'level2a_twoslit',
            input_file
        ),
        tmp_data_path
    )
    args = [
        path.join(SCRIPT_DATA_PATH, 'calwebb_spec2_basic.cfg'),
        path.join(tmp_data_path, input_file),
        '--steps.assign_wcs.skip=true'
    ]

    Step.from_cmdline(args)

    assert 'Aborting remaining processing for this exposure.' in caplog.text

Example #23

0

Show file

File: main.py Project: cosmin220304/SongStorage

def add_song(file, artist, song_name, date, *tags):
    try:
        fileDetails = file.split(".")
        file_name = fileDetails[0]
        type = fileDetails[1].lower()
        id = str(uuid4())

        if type not in accepted_types:
            raise "Unregonized file format!"

        # Format params
        artist = artist.replace("_", " ")
        song_name = artist.replace("_", " ")

        # Check date
        if match('(\d{2})[./-](\d{2})[./-](\d{4})$', date) is None:
            raise Exception("Invalid Date")

        # Copy file and rename it to ID.type
        file_copy(file, "Storage")
        os.rename(f"Storage/{file}", f"Storage/{id}.{type}")

        data = {
            "ID": id,
            "file name": file_name,
            "type": type,
            "artist": artist,
            "song name": song_name,
            "date": date,
            "tags": tags
        }
        db.insert(data)

        return f"Success! Song id: {id}"

    except Exception as error:
        return f"Failure! {error}"

Example #24

0

Show file

File: copy.py Project: l2radamanthys/PlayListCopy

    def run(self):
        step = 0
        size = len(self.data)
        while step < size:
            print '.',
            #quito los salto de linea
            path = self.data[step].replace('\n', '')
            filename = path.split('\\')[-1]

            try:
                npath = os.path.join(self.folder, filename)
                file_copy(path, npath)
                self.gauge.step()
                self.blist.insert(tk.END, "[%s] %s" %(str(step+1).zfill(3), filename))
            
            except:
                time = datetime.datetime.now().strftime("[%d-%m-%Y %H:%M:%S]") 
                text = "%s - ERROR AL COPIAR: %s \n" %(time, path)
                self.log.write(text)
                self.blist.insert(tk.END, "[%s] ERROR %s" %(str(step+1).zfill(3), filename))
            #time.sleep(10) #duermo el hilo          
            step += 1
        
        self.is_run = False

Example #25

0

Show file

def install_my_kernel_spec(user=True):
    with TemporaryDirectory() as td:
        os.chmod(td, 0o755)  # Starts off as 700, not user readable
        with open(os.path.join(td, 'kernel.json'), 'w') as f:
            json.dump(kernel_json, f, sort_keys=True)
        path_of_file = dirname(
            abspath(__file__)) + "/jupyter_kernel_polymake/resources/"
        filenames = [
            "three.js", "Detector.js", "controls/TrackballControls.js",
            "renderers/SVGRenderer.js", "renderers/CanvasRenderer.js",
            "renderers/Projector.js", "menu.svg", "close.svg"
        ]
        for i in filenames:
            file_copy(path_of_file + i, td)
        file_copy(path_of_file + "kernel.js", td)
        file_copy(path_of_file + "logo-32x32.png", td)
        file_copy(path_of_file + "logo-64x64.png", td)
        print('Installing jupyter kernel spec for polymake')
        install_kernel_spec(td, 'polymake', user=user, replace=True)

Example #26

0

Show file

File: test_calwebb_spec2_nrs_msa.py Project: bernie-simon/jwst

def test_run_msaflagging(mk_tmp_dirs, caplog):
    """Test msa flagging operation"""
    tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs

    # Copy necessary data to the tmp_data_path
    file_copy(
        path.join(DATAPATH, 'jw95065006001_0_msa_twoslit.fits'),
        tmp_data_path
    )
    file_copy(
        path.join(DATAPATH, 'mos_udf_g235m_twoslit_spec2_asn.json'),
        tmp_data_path
    )
    asn_path = path.join(tmp_data_path, 'mos_udf_g235m_twoslit_spec2_asn.json')
    with open(asn_path) as fp:
        asn = load_asn(fp)
    for product in asn['products']:
        for member in product['members']:
            file_copy(
                path.join(DATAPATH, 'level2a_twoslit', member['expname']),
                tmp_data_path
            )

    args = [
        path.join(SCRIPT_DATA_PATH, 'calwebb_spec2_basic.cfg'),
        asn_path,
        '--steps.msa_flagging.skip=false'
    ]

    Step.from_cmdline(args)

    assert 'Step msa_flagging running with args' in caplog.text
    assert 'Step msa_flagging done' in caplog.text

    for product in asn['products']:
        prod_name = product['name'] + '_cal.fits'
        assert path.isfile(prod_name)

Example #27

0

Show file

File: incremental_clustering.py Project: alexcasar/snet

def tag_learn_test(**kwargs):  # 90201
    # tag_files args:
    input_path: str = kwargs['input_parses']
    output_path: str = kwargs['output_grammar']  # ['out_path'] ?
    corpus: str = kwargs['output_grammar']
    cp: str

    log = {}

    if 'input_grammar' not in kwargs:
        rulez, re01 = learn(**kwargs)
        log.update(re01)
        new_dict_path = re01['grammar_file']

    else:  # tag and learn
        # print('else: tag and learn')

        #?kwargs['out_path'] = kwargs['output_grammar'] # used in tag_files only
        # if 'out_path' in kwargs:
        #   out_path : str = kwargs['out_path']
        #   del kwargs['out_path']  # tag_files uses kwargs['output_grammar'] instead
        key_dict_path: str = kwargs['input_grammar']  # dict for tagging
        re02 = tag_files(**kwargs)
        log.update(re02)

        #-kwargs['input_parses'] = re1['tagger_output_path'] + '/tagged_ull'
        kwargs['input_parses'] = output_path + '/tagged_ull'
        check_dir(kwargs['input_parses'], False, 'max')

        #-kwargs['output_grammar'] = kwargs['out_path']
        rulez, re03 = learn(**kwargs)  # rulez: dict FIXME: return
        log.update(re03)

        # Decode .dict:
        new_dict_path = re03['grammar_file']
        with open(new_dict_path, 'r') as f:
            d: list = f.read().splitlines()  # TODO? split at dict2list?
        tagged_dict_path = file_copy(new_dict_path, new_dict_path + '.tagged')

        with open(key_dict_path, 'r') as f:
            kd: list = f.read().splitlines()  # TODO? split at dict2list?
        clusters: dict = dict2lists(kd, **kwargs)
        with open(new_dict_path, 'w') as f:
            f.write(decode_dict(d, clusters))
        # TODO: single def to decode dict, input -- 2*strings:
        # with open(key_dict_path, 'r') as f: kd = f.read()  # string
        # with open(new_dict_path, 'r') as f: d = f.read()  # string
        # decoded_dict: str = decode_dict_new(d, kd)
        # decoded

        #-check:
        #-with open(new_dict_path, 'r') as f: tmp = f.read().splitlines()
        #-print(tmp[-7:])

        # TODO: decode cat_tree.txt
        cat_tree_file = re03['cat_tree_file']
        with open(cat_tree_file, 'r') as f:
            tree = f.read()
        tagged_cat_tree_path = file_copy(cat_tree_file,
                                         cat_tree_file + '.tagged')
        with open(cat_tree_file, 'w') as f:
            f.write(decode_cat_tree(tree, kd, **kwargs))

    # TODO: Test Grammar with decoded .dict
    # pa, f1, p, pq: parse-ability, F-measure, precision, parse quality
    pa, f1, p, pq = pqa_meter(new_dict_path, '', '', '', **kwargs)
    # op,cp,rp = '' » use kwargs['out_path'], corpus_path, reference_path
    # TODO: log.update(a, f1, p, q)
    # print('pa, f1, p, pq:', pa, f1, p, pq)
    # TODO: replace pqa_meter with a local function: re = pqa(**kwargs)

    # TODO: decode & return rulez? return .dict converted to a string?
    # TODO: return line []?
    return log['grammar_rules'], pa, f1, log  # rulez, log

Example #28

0

Show file

File: workflow.py Project: rsdefever/openff-evaluator

    def _store_output_data(
        data_object_path,
        data_directory,
        output_to_store,
        results_by_id,
    ):
        """Collects all of the simulation to store, and saves it into a directory
        whose path will be passed to the storage backend to process.

        Parameters
        ----------
        data_object_path: str
            The file path to serialize the data object to.
        data_directory: str
            The path of the directory to store ancillary data in.
        output_to_store: BaseStoredData
            An object which contains `ProtocolPath`s pointing to the
            data to store.
        results_by_id: dict of ProtocolPath and any
            The results of the protocols which formed the property
            estimation workflow.
        """

        makedirs(data_directory, exist_ok=True)

        for attribute_name in output_to_store.get_attributes(StorageAttribute):

            attribute = getattr(output_to_store.__class__, attribute_name)
            attribute_value = getattr(output_to_store, attribute_name)

            if isinstance(attribute_value, ProtocolPath):

                # Strip any nested attribute accessors before retrieving the result
                property_name = attribute_value.property_name.split(
                    ".")[0].split("[")[0]

                result_path = ProtocolPath(property_name,
                                           *attribute_value.protocol_ids)
                result = results_by_id[result_path]

                if result_path != attribute_value:

                    result = get_nested_attribute(
                        {property_name: result}, attribute_value.property_name)

                attribute_value = result

                # Do not store gradient information for observables as this information
                # is very workflow / context specific.
                if isinstance(attribute_value,
                              (Observable, ObservableArray, ObservableFrame)):
                    attribute_value.clear_gradients()

            if issubclass(attribute.type_hint, FilePath):
                file_copy(attribute_value, data_directory)
                attribute_value = path.basename(attribute_value)

            setattr(output_to_store, attribute_name, attribute_value)

        with open(data_object_path, "w") as file:
            json.dump(output_to_store, file, cls=TypedJSONEncoder)

Example #29

0

Show file

File: kata_containers_installer.py Project: zhnghc/metarget

    def install_by_version(cls, gadgets, kata_runtime_type,
                           http_proxy=None, https_proxy=None, no_proxy=None, verbose=False):
        """Install Kata-containers with specified version.

        Args:
            gadgets: Kata-containers gadgets (e.g. kata-containers).
            kata_runtime_type: Runtime of Kata (e.g. qemu/clh/...).
            http_proxy: HTTP proxy.
            https_proxy: HTTPS proxy.
            no_proxy: Domains which should be visited without proxy.
            verbose: Verbose or not.

        Returns:
            Boolean indicating whether Kata-containers is successfully installed or not.
        """
        stdout, stderr = verbose_func.verbose_output(verbose)

        kata_static_tar_file = config.kata_static_tar_file % gadgets[0]['version']
        kata_static_save_path = config.runtime_data_dir + kata_static_tar_file
        kata_static_tar = Path(kata_static_save_path)

        # 1. download kata tar if necessary
        if not kata_static_tar.exists():
            color_print.debug(
                '{kata_tar} is going to be downloaded'.format(
                    kata_tar=kata_static_tar_file))
            kata_static_url = (
                config.kata_static_url_prefix %
                gadgets[0]['version']) + kata_static_tar_file
            proxies = {
                'http': http_proxy,
                'https': https_proxy,
                'no_proxy': no_proxy,
            }
            cls.download_file(
                url=kata_static_url,
                save_path=kata_static_save_path,
                proxies=proxies)
        else:
            color_print.debug(
                '{kata_tar} has been downloaded'.format(
                    kata_tar=kata_static_tar_file))

        # 2. decompress
        color_print.debug(
            'decompressing files into {dest}'.format(
                dest=config.kata_tar_decompress_dest))
        rmtree(path=config.kata_tar_decompress_dest, ignore_errors=True)
        system_func.mkdir_if_not_exist(config.kata_tar_decompress_dest)
        # use --strip-components=3 because `opt/kata/` path from tar are not needed
        # also, we should not just decompress files into `/` root path
        # which might cause risks
        temp_cmd = 'tar xf {file} -C {dest} --strip-components=3'.format(
            file=kata_static_save_path, dest=config.kata_tar_decompress_dest)
        try:
            subprocess.run(
                temp_cmd.split(),
                stdout=stdout,
                stderr=stderr,
                check=True)
        except subprocess.CalledProcessError:
            color_print.error(
                'failed to decompress {kata_tar}'.format(
                    kata_tar=kata_static_tar_file))
            return False

        # 3. copy files
        color_print.debug(
            'copying files to {kata_config_dir}'.format(
                kata_config_dir=config.kata_config_dir))
        rmtree(path=config.kata_config_dir, ignore_errors=True)
        system_func.mkdir_if_not_exist(config.kata_config_dir)
        for file in glob.glob(
                config.kata_tar_decompress_dest + 'share/defaults/kata-containers/*'):
            file_copy(
                src=file,
                dst=config.kata_config_dir,
                follow_symlinks=False)

        # 4. configure runtime type
        color_print.debug(
            'configuring kata runtime (type: {runtime_type})'.format(
                runtime_type=kata_runtime_type))
        kata_configuration_file = Path(
            '{kata_config_dir}/configuration.toml'.format(kata_config_dir=config.kata_config_dir))
        if kata_configuration_file.exists():
            kata_configuration_file.unlink()
        kata_configuration_file.symlink_to(
            '{kata_config_dir}/configuration-{runtime_type}.toml'.format(
                kata_config_dir=config.kata_config_dir,
                runtime_type=kata_runtime_type))

        # [5]. if docker is installed,
        # modify docker's configuration and restart docker
        # currently, metarget only supports docker
        # in the future more CRIs will be supported
        # see
        # https://github.com/kata-containers/documentation/blob/master/how-to/run-kata-with-k8s.md
        color_print.debug('configuring docker with kata-containers')
        if not cls._configure_docker_with_kata(
                base_dir=config.kata_tar_decompress_dest):
            color_print.error(
                'failed to configure docker with kata-containers')
            return False
        return cls.reload_and_restart_docker(verbose=verbose)

Example #30

0

Show file

def generate_train_test_set_helper(sample_dir,
                                   target_dir,
                                   cross_val_num=5,
                                   seed=42,
                                   has_just_feature_matrix=False,
                                   dataset_name=""):
    sample_dir_path = Path(sample_dir)
    target_dir_path = Path(target_dir)

    if not dataset_name:
        dataset_name = sample_dir_path.stem

    # guess class label file
    class_label_file = MccImsAnalysis.guess_class_label_extension(
        sample_dir_path)
    class_labels = MccImsAnalysis.parse_class_labels(class_label_file)

    available_raw_files = sorted(sample_dir_path.glob("*_ims.csv"))
    available_preprocessed_files = sorted(
        sample_dir_path.glob("*_ims_preprocessed.csv"))
    available_pdrs = sorted(
        sample_dir_path.glob("*_peak_detection_result.csv"))

    # make sure we know which files we're missing so we can get them - too many files is not a problem - subset is allowed - or if we need to edit class labels
    not_available_raw_files = []
    if available_raw_files:
        raw_files = []
        for arw in available_raw_files:
            raw_files.append(arw.name)
        raw_file_set = set(raw_files)

        for fn in class_labels.keys():
            if fn not in raw_file_set:
                not_available_raw_files.append(fn)
        print(f"Missing raw files: {not_available_raw_files}")

    not_available_preproc_files = []
    if available_preprocessed_files:
        preproc_files = []
        for apf in available_preprocessed_files:
            raw_name = apf.name.split("_ims.csv_")[0]
            raw_name += "_ims.csv"
            preproc_files.append(raw_name)
        preproc_file_set = set(preproc_files)
        for fn in class_labels:
            if fn not in preproc_file_set:
                not_available_preproc_files.append(
                    f"{fn[:-4]}_preprocessed.csv")
        print(f"Missing preprocessed files: {not_available_preproc_files}")

    # approximate search, dont want to spell out all pdr_names
    not_available_pdr_files = []
    if available_pdrs:
        av_pdrs = []
        for apdr in available_pdrs:
            raw_name = apdr.name.split("_ims.csv_")[0]
            raw_name += "_ims.csv"
            av_pdrs.append(raw_name)

        av_pdr_set = set(av_pdrs)
        for fn in class_labels:
            if fn not in av_pdr_set:
                not_available_pdr_files.append(
                    f"{fn[:-4]}_peak_detection_result.csv")
        print(f"Missing peak detection result: {not_available_pdr_files}")

    if not_available_raw_files or not_available_preproc_files or not_available_pdr_files:
        raise ValueError(
            "Class labels needs to be adjusted or missing files added.")

    # check if we have a layer_file
    potential_layers = [
        str(filename) for filename in sample_dir_path.glob("*")
        if (str.endswith(str(filename), "layer.csv")
            or str.endswith(str(filename), "layer.xls"))
    ]

    print(
        f"Preparing dataset for {Counter(class_labels.values())} using {cross_val_num}-fold cross validation splits."
    )

    X = [k for k in class_labels.keys()]
    y = [v for v in class_labels.values()]
    # class_labels[m.filename]

    test_fraction = 1. / cross_val_num
    train_df, test_df = split_labels_ratio(class_labels,
                                           train_val_fraction=1 -
                                           test_fraction,
                                           seed=seed)

    train_dir = str(target_dir_path) + "/" + f"train_{dataset_name}/"
    test_dir = str(target_dir_path) + "/" + f"test_{dataset_name}/"

    print(f"Deleting {train_dir} and {test_dir}")
    # delete train and test dir if already exisitent
    if Path(train_dir).exists():
        rmtree(train_dir, ignore_errors=True)
    if Path(test_dir).exists():
        rmtree(test_dir, ignore_errors=True)

    # TODO also remove exsiting results, such as peak_detection_results, feature matrices
    print(f"Creating {train_dir} and {test_dir}")

    Path(train_dir).mkdir(parents=True)
    Path(test_dir).mkdir(parents=True)

    tr_class_label_fn = Path(train_dir) / Path(class_label_file).name
    te_class_label_fn = Path(test_dir) / Path(class_label_file).name

    train_df[['name', 'label']].to_csv(tr_class_label_fn, sep=",", index=False)
    test_df[['name', 'label']].to_csv(te_class_label_fn, sep=",", index=False)

    # check if it has peak detection results
    pdrs = sorted(sample_dir_path.glob("*_peak_detection_result.csv"))

    # distribute into train and test list
    train_name_set = set(train_df['name'].values)
    test_name_set = set(test_df['name'].values)

    cannot_copy = []
    for pdr in pdrs:
        raw_fn_pre = pdr.name.split("_ims.csv")[0]
        raw_fn = raw_fn_pre + "_ims.csv"

        new_fn = ""
        if raw_fn in train_name_set:
            new_fn = Path(train_dir) / pdr.name
        elif raw_fn in test_name_set:
            new_fn = Path(test_dir) / pdr.name
        else:
            cannot_copy.append(pdr)

        # copy to destination
        if new_fn:
            file_copy(pdr, new_fn)

    if cannot_copy:
        print(f"{len(cannot_copy)} PDRs not in either index.",
              f"{cannot_copy}")

    if has_just_feature_matrix:
        # write feature matrix
        potential_feature_matrices = sample_dir_path.glob(
            "*_feature_matrix.csv")
        for fn in potential_feature_matrices:

            try:
                fm = pd.read_csv(fn, index_col=0)
                tr_fm = fm.loc[fm.index.intersection(train_df['name'])]
                te_fm = fm.loc[fm.index.intersection(test_df['name'])]

                tr_fm_fn = Path(train_dir) / "train_feature_matrix.csv"
                te_fm_fn = Path(test_dir) / "test_feature_matrix.csv"

                tr_fm.to_csv(tr_fm_fn)
                te_fm.to_csv(te_fm_fn)
                print(f"Created feature matrices {tr_fm_fn} and {te_fm_fn}")

                # also implement for other branches - pdr and preprocessed
                for t_dir, t_fm, t_cl, in zip(
                    [train_dir, test_dir], [tr_fm_fn, te_fm_fn],
                    [tr_class_label_fn, te_class_label_fn]):
                    t_dir_path = Path(t_dir)
                    t_dir_name = t_dir_path.stem

                    zip_path_tr = t_dir_path / f"{t_dir_name}.zip"
                    with ZipFile(zip_path_tr, 'w', ZIP_DEFLATED) as trzip:
                        trzip.write(
                            t_fm, t_fm.name
                        )  # needs to exist as file object on disk to to write to zip
                        trzip.write(t_cl, t_cl.name)

            except ValueError:
                # go until no more potential candidates, which should be just one anyways
                pass

    else:
        # copy files to target dirs - only works if raw files are actually there, not always the case - eg if there's just results
        raw_files_not_copied = []
        for fn in train_df.name:
            file_path = Path(sample_dir_path) / fn
            new_path = Path(train_dir) / fn
            if file_path.exists():
                file_copy(file_path, dst=new_path)
            else:
                raw_files_not_copied.append(file_path)
        # same for test set
        for fn in test_df.name:
            file_path = Path(sample_dir_path) / fn
            new_path = Path(test_dir) / fn
            if file_path.exists():
                file_copy(file_path, dst=new_path)
            else:
                raw_files_not_copied.append(file_path)
        if raw_files_not_copied:
            print(
                f"Didn't copy {len(raw_files_not_copied)} raw files - as not found in source directory."
            )

        # also consider featureXML files
        feature_xmls_fns = filter_feature_xmls(sample_dir_path)
        cannot_copy = []
        for feature_xml_fn_ in feature_xmls_fns:
            feature_xml_fn = Path(feature_xml_fn_)
            # split_fn - so can campare with class labels
            # need to get file ending of original raw file to match with class labels
            # could be MZML or MZXML
            raw_fn_pre = feature_xml_fn.name.split("ML_output.featureXML")[0]
            raw_fn = raw_fn_pre + "ML"

            new_fn = ""
            if raw_fn in train_name_set:
                new_fn = Path(train_dir) / feature_xml_fn.name
            elif raw_fn in test_name_set:
                new_fn = Path(test_dir) / feature_xml_fn.name
            else:
                cannot_copy.append(feature_xml_fn)

            # copy to destination
            if new_fn:
                file_copy(feature_xml_fn, new_fn)

        if feature_xmls_fns:
            print(
                f"Copied {len(feature_xmls_fns) - len(cannot_copy)}/{len(feature_xmls_fns)} featureXML files."
            )
        if cannot_copy:
            print(f"{len(cannot_copy)} featureXML not in either index.",
                  f"{cannot_copy}")

    # guess layer file and copy to target dir too
    if potential_layers:
        potential_layer_file = potential_layers[0]
        layer_name = Path(potential_layer_file).stem + Path(
            potential_layer_file).suffix
        file_copy(potential_layers[0], dst=str(train_dir) + "/" + layer_name)
        file_copy(potential_layers[0], dst=str(test_dir) + "/" + layer_name)

    print(f"{'|' * 40}\nFinished preparation of {dataset_name}\n")
    return train_df, test_df

Example #31

0

Show file

def generate_full_candy_classes(plot_params, file_params, preprocessing_steps,
                                evaluation_params_dict):
    all_files = glob.glob(file_params['data_dir'] + "*_ims.csv")

    class_labels = {
        r[0]: r[1]
        for r in np.loadtxt(file_params['data_dir'] + "class_labels.csv",
                            delimiter=",",
                            dtype=str,
                            skiprows=1)
    }
    from collections import OrderedDict
    class_labels = OrderedDict(class_labels)
    analysis = MccImsAnalysis(
        [MccImsMeasurement(f) for f in all_files],
        preprocessing_steps, [],
        performance_measure_parameters=evaluation_params_dict,
        class_label_file=file_params['label_filename'],
        dataset_name='full_candy',
        dir_level="")

    for m in analysis.measurements:
        class_label = class_labels.get(m.filename)
        m.set_class_label(class_label)

    from sklearn.model_selection import train_test_split
    from shutil import copy as file_copy
    from shutil import rmtree
    from pathlib import Path

    X = [k for k in class_labels.keys()]
    y = [v for v in class_labels.values()]
    # class_labels[m.filename]
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.20,
                                                        random_state=42)
    train_df = pd.DataFrame({"name": X_train, "candy": y_train})
    test_df = pd.DataFrame({"name": X_test, "candy": y_test})
    train_dir = file_params['data_dir'] + "train_full_candy/"
    test_dir = file_params['data_dir'] + "test_full_candy/"

    # delete train and test dir
    if Path(train_dir).exists():
        rmtree(train_dir, ignore_errors=True)
    if Path(test_dir).exists():
        rmtree(test_dir, ignore_errors=True)

    # create directory
    Path(train_dir).mkdir()
    Path(test_dir).mkdir()

    # create class_label file
    train_df[['name', 'candy']].to_csv(train_dir + "class_labels.csv", sep=",")
    test_df[['name', 'candy']].to_csv(test_dir + "class_labels.csv", sep=",")

    # copy files to target dirs
    for fn in train_df.name:
        file_path = file_params['data_dir'] + fn
        new_path = train_dir + fn
        file_copy(file_path, dst=new_path)

    # same for test set
    for fn in test_df.name:
        file_path = file_params['data_dir'] + fn
        new_path = test_dir + fn
        file_copy(file_path, dst=new_path)

Example #32

0

Show file

def main():
    '''Main function to get a fresh scope.'''
    args = parse_arguments()
    # Setup functions for dry run if necessary, else start logging
    if args.dry:
        print('Performing dry run!')
        setup_dry_run()
    else:
        # Create root directory for all the results
        os.mkdir(args.outdir)
        # Setup logging
        logging.basicConfig(filename=os.path.join(args.outdir, 'stats.log'),
                            level=logging.INFO)
    # Models we are applying
    models, models2 = load_models(args.modeldir, args.powerset)
    if len(models) == 0 and not args.none:
        print('Warning, no model is being used! Use --none to force')
        return
    # Save some interesting data in global stats
    write_stats(args, models, args.powerset)

    logger_stats = logging.getLogger('stats')
    logger_other = logging.getLogger('other')

    # If provided, copy configuration file
    # TODO also, read values from the config file instead of using cli arguments
    if args.config is not None:
        cfg = os.path.join(args.outdir, 'configuration.ini')
        file_copy(args.config, cfg)
        args.config = cfg  # Replace old choice
        if not args.noask:
            print(
                f'This is the time to review your configuration file in {cfg}')
            print(
                f'Generation counts will be {args.shortg} (short) and {args.longg} (long)'
            )
            input(f'Press Enter when ready to go.')

        # Save config file to stats, for reference
        with zopen(cfg, 'rt') as cfgfp:
            global_stats['configuration.ini'] = cfgfp.read()

    # TODO
    # per poter fare le analisi sul tempo, bisogna avere una media delle semantiche
    # dopo tutti i run. per non recuperare le semantiche al momento delle analisi dati
    # conviene fare qui la media e la produzione di un file di output medio che sia facilmente usabile nelle analisi

    for r in range(args.runs):
        print(f'Performing run {r}')
        # Prepare output directory for this run
        outdir = get_run_path(args.outdir, r)  # somepath/sim += /sim{r}
        os.mkdir(outdir)

        # Prepare dataset in somepath/sim/sim{r}/dataset
        # This single run will have the datafile partitioned in k folds
        dataset = Dataset(args.datafile, args.k_folds, outdir)
        cons, cons_msg = dataset.is_consistent()
        if not cons:
            # Emit a signal if K does not evenly partition the dataset
            print('Warning! Selected K cannot produce consistent semantics!')
            print(cons_msg)
            logi('run.dataset', cons_msg)
        dataset.generate_folds(True)

        # Model selection
        if args.all:
            bm = len(models2) - 1  # Last combination
            t_tot = 0  # No time spent
        elif args.none:
            bm = 0
            t_tot = 0  # No time spent
        else:
            if args.powerset:
                bm, t_tot = run_powerset(args, outdir, models2, dataset)
            else:
                bm, t_tot = run_set(args, outdir, models2, dataset)

        # Get actual
        best_models = models2[bm]

        # Save selection time
        global_stats['sel_time'] = global_stats.get('sel_time', 0) + t_tot
        global_stats.setdefault('sel_times', []).append(t_tot)
        logi('stats.selection.walltimes',
             f'Time for running selection: {t_tot}')
        # Increment best model usage
        global_stats['best_models'] = global_stats.get(
            'best_models', Counter()) + Counter({str(bm): 1})
        # Save combination
        global_stats.setdefault('bm_hist', []).append(bm)
        logi('stats.selection.models.best', f'{bm} {best_models}')

        print('Performing long run with best models', models2[bm])
        # Prepare simulation, storing data in somepath/sim/sim{r}/longrun
        forrest = Forrest(f'longrun', args.algorithm, models2[bm], dataset,
                          args.k_folds, outdir, args.bindir, args.config)

        # Run simulation
        k_fits, k_timing, avg_sem_train, avg_sem_test = forrest.run(args.longg)

        # Write average semantic data
        forrest.save_files(avg_sem_train, avg_sem_test)

        # Save logs and stats
        logi('stats.longrun.cv.fitness.average',
             f'Average CV: {row_average(k_fits)}')
        t_tot = sum(k_timing)  # Total time for executing K-fold CV
        logi('stats.longrun.walltimes', f'Total time for longruns: {t_tot}')
        global_stats['lon_time'] = global_stats.get('lon_time', 0) + t_tot
        global_stats.setdefault('lon_times', []).append(t_tot)

    logi('stats.selection.models.frequency', f'{global_stats["best_models"]}')

    with zopen(os.path.join(args.outdir, 'stats.json'), 'wt') as statfile:
        #for k, v in global_stats.items():
        #    print(f'Writing stat {k} = {v}')
        json.dump(global_stats, statfile, indent=4, sort_keys=True)