Python make_directory 예제들, helpers.make_directory Python 예제들

예제 #1

0

파일 보기

파일: gcm_delta_processing.py 프로젝트: tnwillia/waterapputils

def create_output_dirs_files(settings, is_sub_gcm_delta=False):
    """    
    Create the output directories and files needed to processes wateruse.

    Parameters
    ----------
    settings : dictionary
        Dictionary of user settings

    Returns
    -------
    info_dir : string
        string path to info directory
    gcm_delta_dir : string 
        string path to ecoflow directory
    info_file : string
        string path to info file

    Notes
    -----
    Uses settings set in user_settings.py 
    """
    # create output directories
    info_dir = helpers.make_directory(
        path=settings["simulation_directory"],
        directory_name=settings["info_directory_name"])
    gcm_delta_dir = helpers.make_directory(
        path=settings["simulation_directory"],
        directory_name=settings["gcm_delta_directory_name"])

    # path to info file
    if is_sub_gcm_delta:
        info_file = os.path.join(info_dir,
                                 settings["sub_gcm_delta_info_file_name"])
    else:
        info_file = os.path.join(info_dir,
                                 settings["gcm_delta_info_file_name"])

    # print input and output information
    helpers.print_input_output_info(input_dict={
        "simulation_directory":
        settings["simulation_directory"],
        "gcm_delta_prepend_name":
        settings["gcm_delta_prepend_name"],
        "gcm_delta_directory_name":
        settings["gcm_delta_directory_name"],
        "gcm_delta_info_file_name":
        settings["gcm_delta_info_file_name"],
        "gcm_delta_non_intersecting_file_name":
        settings["gcm_delta_non_intersecting_file_name"],
        "sub_gcm_delta_info_file_name":
        settings["sub_gcm_delta_info_file_name"],
    },
                                    output_dict={
                                        "info_dir": info_dir,
                                        "info_file": info_file,
                                        "gcm_delta_dir": gcm_delta_dir
                                    })

    return info_dir, gcm_delta_dir, info_file

예제 #2

0

파일 보기

파일: water_files_processing.py 프로젝트: tnwillia/waterapputils

def process_water_files(file_list, settings, print_data=True):
    """    
    Process a list of WATER xml files according to options contained in arguments parameter.

    Parameters
    ----------
    file_list : list 
        List of files to parse, process, and plot.        
    arguments : argparse object
        An argparse object containing user options.                    
    """
    print("Processing WATER files ...\n")

    for f in file_list:

        ext = os.path.splitext(f)[1]
        assert ext == ".txt" or ext == ".xml", "Can not process file {}. File extension {} is not .txt or .xml".format(
            f, ext)

        filedir, filename = helpers.get_file_info(f)

        if ext == ".txt":
            output_dir = helpers.make_directory(
                path=filedir,
                directory_name=settings["watertxt_directory_name"])
            helpers.print_input_output_info(
                input_dict={"input_file": f},
                output_dict={"output_directory": output_dir})
            waterapputils_logging.initialize_loggers(output_dir=output_dir)

            data = watertxt.read_file(f)
            watertxt_viewer.plot_watertxt_data(data, save_path=output_dir)
            if print_data:
                watertxt_viewer.print_watertxt_data(data)

        elif ext == ".xml":
            output_dir = helpers.make_directory(
                path=filedir,
                directory_name=settings["waterxml_directory_name"])
            waterapputils_logging.initialize_loggers(output_dir=output_dir)
            helpers.print_input_output_info(
                input_dict={"input_file": f},
                output_dict={"output_directory": output_dir})

            data = waterxml.read_file(f)
            waterxml_viewer.plot_waterxml_timeseries_data(data,
                                                          save_path=output_dir)
            waterxml_viewer.plot_waterxml_topographic_wetness_index_data(
                data, save_path=output_dir)
            if print_data:
                waterxml_viewer.print_waterxml_data(data)

        waterapputils_logging.remove_loggers()

예제 #3

0

파일 보기

파일: current_status_test.py 프로젝트: woongbinchoi/English-Premier-League-Prediction

def test_get_rankings_year_after_sofifa():
    year = 2008
    temp_folder = os.path.join(os.getcwd(), 'temp')
    csv_file = '{}-{}.csv'.format(year, year + 1)
    from_file = os.path.join(RAW_CLEANED_DATA_FILE_PATH, csv_file)
    to_file = os.path.join(temp_folder, csv_file)
    make_directory(temp_folder)

    get_rankings(from_file, to_file, '{}-12-31'.format(str(year+1)), include_prediction=False)

    cmp_file = os.path.join(STANDINGS_PATH, csv_file)
    assert compare_csv(cmp_file, to_file)
    remove_directory(temp_folder)

예제 #4

0

파일 보기

파일: current_status_test.py 프로젝트: woongbinchoi/English-Premier-League-Prediction

def test_get_rankings_all():
    temp_folder = os.path.join(os.getcwd(), 'temp/file.csv')
    make_directory(temp_folder)

    from_year, to_year = 1993, 2019
    get_rankings_all(from_year, to_year, RAW_CLEANED_DATA_FILE_PATH, temp_folder)

    for year in range(from_year, to_year + 1):
        csv_file = '{}-{}.csv'.format(year, year + 1)
        created_file = os.path.join(temp_folder, csv_file)
        cmp_file = os.path.join(STANDINGS_PATH, csv_file)
        assert compare_csv(cmp_file, created_file)
    remove_directory(temp_folder)

예제 #5

0

파일 보기

파일: wateruse_processing.py 프로젝트: jlant-usgs/waterapputils

def create_output_dirs_files(settings, is_sub_wateruse = False):
    """    
    Create the output directories and files needed to processes wateruse.

    Parameters
    ----------
    settings : dictionary
        Dictionary of user settings

    Returns
    -------
    info_dir : string
        string path to info directory
    ecoflow_dir : string 
        string path to ecoflow directory
    oasis_dir : string
        string path to oasis directory
    info_file : string
        string path to info file

    Notes
    -----
    Uses settings set in user_settings.py 
    """   
    # create output directories   
    info_dir = helpers.make_directory(path = settings["simulation_directory"], directory_name = settings["info_directory_name"])    
    ecoflow_dir = helpers.make_directory(path = settings["simulation_directory"], directory_name = settings["ecoflow_directory_name"])
    oasis_dir = helpers.make_directory(path = settings["simulation_directory"], directory_name = settings["oasis_directory_name"])
    
    # path to info file
    if is_sub_wateruse:
        info_file = os.path.join(info_dir, settings["sub_wateruse_info_file_name"])
    else:
        info_file = os.path.join(info_dir, settings["wateruse_info_file_name"])

    # print input and output information
    helpers.print_input_output_info( 
        input_dict = {"simulation_directory": settings["simulation_directory"],
                      "wateruse_prepend_name": settings["wateruse_prepend_name"],
                      "wateruse_directory_name": settings["wateruse_directory_name"],
                      "wateruse_info_file_name": settings["wateruse_info_file_name"],
                      "wateruse_non_intersecting_file_name": settings["wateruse_non_intersecting_file_name"],
                      "sub_wateruse_info_file_name": settings["sub_wateruse_info_file_name"],
        },
        output_dict = {"info_dir": info_dir, "info_file": info_file, "ecoflow_dir": ecoflow_dir, "oasis_dir": oasis_dir}
    )

    return info_dir, ecoflow_dir, oasis_dir, info_file

예제 #6

0

파일 보기

def write_oasis_file(file_list, dir_name, file_name):

    for f in file_list:

        filedir, filename = helpers.get_file_info(f)

        oasis_dir = helpers.make_directory(path=filedir,
                                           directory_name=dir_name)

        helpers.print_input_output_info(
            input_dict={"input_file": f},
            output_dict={"output_directory": oasis_dir})

        waterapputils_logging.initialize_loggers(output_dir=oasis_dir)

        watertxt_data = watertxt.read_file(f)

        # write timeseries of discharge + water use for OASIS
        watertxt.write_timeseries_file(
            watertxt_data=watertxt_data,
            name="Discharge + Water Use",
            save_path=oasis_dir,
            filename="-".join([watertxt_data["stationid"], file_name]))

        waterapputils_logging.remove_loggers()

예제 #7

0

파일 보기

파일: map_processing.py 프로젝트: tnwillia/waterapputils

def create_output_dir(settings):
    """    
    Create the output directories and files needed to processes wateruse.

    Parameters
    ----------
    settings : dictionary
        Dictionary of user settings

    Returns
    -------
    map_dir : string
        string path to map directory

    Notes
    -----
    Uses settings set in user_settings.py 
    """
    # create output directories
    map_dir = helpers.make_directory(
        path=settings["simulation_directory"],
        directory_name=settings["map_directory_name"])

    # print input and output information
    helpers.print_input_output_info(input_dict={
        "simulation_directory":
        settings["simulation_directory"],
        "map_directory_name":
        settings["map_directory_name"],
    },
                                    output_dict={" map_dir": map_dir})

    return map_dir

예제 #8

0

파일 보기

파일: map_processing.py 프로젝트: jlant-usgs/waterapputils

def create_output_dir(settings):
    """    
    Create the output directories and files needed to processes wateruse.

    Parameters
    ----------
    settings : dictionary
        Dictionary of user settings

    Returns
    -------
    map_dir : string
        string path to map directory

    Notes
    -----
    Uses settings set in user_settings.py 
    """   
    # create output directories   
    map_dir = helpers.make_directory(path = settings["simulation_directory"], directory_name = settings["map_directory_name"])    
    
    # print input and output information
    helpers.print_input_output_info( 
        input_dict = {"simulation_directory": settings["simulation_directory"],
                      "map_directory_name": settings["map_directory_name"],
        },
        output_dict = {" map_dir": map_dir}
    )

    return map_dir

예제 #9

0

파일 보기

파일: specific_output_file_processing.py 프로젝트: jlant-usgs/waterapputils

def write_ecoflow_file_stationid(file_list, dir_name, file_name, parameter_name = "Discharge + Water Use"):
    """    
    Write a csv file containing a timeseries for a particular parameter contained in a WATER.txt file

    Parameters
    ----------
    file_list : list
        List of WATER.txt files to process
    dir_name : string
        String name for output directory
    file_name : string
        String name for output file
    parameter_name : string
        String name for a parameter contained in a WATER.txt file
    """   

    for f in file_list:
               
        filedir, filename = helpers.get_file_info(f)       

        ecoflow_dir = helpers.make_directory(path = filedir, directory_name = dir_name)

        helpers.print_input_output_info(input_dict = {"input_file": f}, output_dict = {"output_directory": ecoflow_dir})

        waterapputils_logging.initialize_loggers(output_dir = ecoflow_dir) 

        watertxt_data = watertxt.read_file(f)      

        # write timeseries of discharge + water use for ecoflow program
        watertxt.write_timeseries_file_stationid(watertxt_data, name = parameter_name, save_path = ecoflow_dir, filename = file_name, stationid = watertxt_data["stationid"])
                
        waterapputils_logging.remove_loggers()

예제 #10

0

파일 보기

def write_ecoflow_file_drainageareaxml(file_list, dir_name, file_name):
    """    
    Write a csv file containing a label (basin id number) and its corresponding area.

    Parameters
    ----------
    file_list : list
        List of WATERSimulation.xml files to process
    dir_name : string
        String name for output directory
    file_name : string
        String name for output file
    """
    area_data = {}
    for f in file_list:

        filedir, filename = helpers.get_file_info(f)

        ecoflow_dir = helpers.make_directory(path=filedir,
                                             directory_name=dir_name)

        helpers.print_input_output_info(
            input_dict={"input_file": f},
            output_dict={"output_directory": ecoflow_dir})

        waterapputils_logging.initialize_loggers(output_dir=ecoflow_dir)

        # read xml file
        waterxml_tree = waterxml.read_file(f)

        # get area from each region from the xml file and sum for a total area
        project, study, simulation = waterxml.get_xml_data(
            waterxml_tree=waterxml_tree)

        # get the project name which is the same as the stationid
        stationid = project["ProjName"]

        # get the area means for each region
        areas = waterxml.get_study_unit_areas(simulation_dict=simulation)

        # calculate total area
        total_area = waterxml.calc_total_study_unit_areas(areas)

        # fill area_data with total area
        area_data[stationid] = total_area

    # convert from km**2 to mi**2
    area_data = helpers.convert_area_values(area_data,
                                            in_units="km2",
                                            out_units="mi2")

    # write timeseries of dishcarge + water use for ecoflow program
    watertxt.write_drainagearea_file(area_data=area_data,
                                     save_path=ecoflow_dir,
                                     filename=file_name)

    waterapputils_logging.remove_loggers()

예제 #11

0

파일 보기

def test_get_current_fixtures():
    temp_folder = os.path.join(os.getcwd(), 'temp')
    temp_file = os.path.join(temp_folder, 'temp.csv')
    make_directory(temp_file)
    get_current_fixtures(temp_file)

    assert os.path.isfile(temp_file)
    df = pd.read_csv(temp_file)
    df_columns_list = list(df)

    assert 'Date' in df_columns_list
    assert 'HomeTeam' in df_columns_list
    assert 'AwayTeam' in df_columns_list
    assert 'FTHG' in df_columns_list
    assert 'FTAG' in df_columns_list
    assert 'FTR' in df_columns_list

    remove_directory(temp_folder)

예제 #12

0

파일 보기

파일: water_files_processing.py 프로젝트: jlant-usgs/waterapputils

def process_water_files(file_list, settings, print_data=True):
    """    
    Process a list of WATER xml files according to options contained in arguments parameter.

    Parameters
    ----------
    file_list : list 
        List of files to parse, process, and plot.        
    arguments : argparse object
        An argparse object containing user options.                    
    """
    print("Processing WATER files ...\n")

    for f in file_list:

        ext = os.path.splitext(f)[1]
        assert ext == ".txt" or ext == ".xml", "Can not process file {}. File extension {} is not .txt or .xml".format(
            f, ext
        )

        filedir, filename = helpers.get_file_info(f)

        if ext == ".txt":
            output_dir = helpers.make_directory(path=filedir, directory_name=settings["watertxt_directory_name"])
            helpers.print_input_output_info(input_dict={"input_file": f}, output_dict={"output_directory": output_dir})
            waterapputils_logging.initialize_loggers(output_dir=output_dir)

            data = watertxt.read_file(f)
            watertxt_viewer.plot_watertxt_data(data, save_path=output_dir)
            if print_data:
                watertxt_viewer.print_watertxt_data(data)

        elif ext == ".xml":
            output_dir = helpers.make_directory(path=filedir, directory_name=settings["waterxml_directory_name"])
            waterapputils_logging.initialize_loggers(output_dir=output_dir)
            helpers.print_input_output_info(input_dict={"input_file": f}, output_dict={"output_directory": output_dir})

            data = waterxml.read_file(f)
            waterxml_viewer.plot_waterxml_timeseries_data(data, save_path=output_dir)
            waterxml_viewer.plot_waterxml_topographic_wetness_index_data(data, save_path=output_dir)
            if print_data:
                waterxml_viewer.print_waterxml_data(data)

        waterapputils_logging.remove_loggers()

예제 #13

0

파일 보기

def write_ecoflow_file_drainageareashp(file_list, dir_name, file_name,
                                       label_field, query_field):
    """    
    Write a csv file containing a label (basin id number) and its corresponding area.
    Two methods to get the area from each respective shapefile:

        1. if shapefile(s) has an area field and user specifies it in user_settings.py 
        under the *basin_shapefile_area_field* variable then get the area for each
        basin using the specified area field name (query_field)
        
        2. if shapefile(s) do not have an area field or user does not specify is in
        user_settings.py, then calculate it using osgeo and label each basin according
        to *basin_shapefile_id_field* in user_settings.py 

    Parameters
    ----------
    file_list : list
        List of files to process; files are shapefiles
    settings : dictionary
        Dictionary of user settings
    label_field : string
        String name of an id field (basin id number) to associate with a basin
    query_field : string
        String name of an area field

    Notes
    -----
    Uses settings set in user_settings.py  
    """
    for f in file_list:

        filedir, filename = helpers.get_file_info(f)

        ecoflow_dir = helpers.make_directory(path=filedir,
                                             directory_name=dir_name)

        waterapputils_logging.initialize_loggers(output_dir=ecoflow_dir)

        helpers.print_input_output_info(
            input_dict={"input_file": f},
            output_dict={"output_directory": ecoflow_dir})

        basin_shapefile = osgeo.ogr.Open(f)

        # get the areas for each region
        areas = spatialvectors.get_areas_dict(shapefile=basin_shapefile,
                                              id_field=label_field,
                                              query_field=query_field)

        # write timeseries of dishcarge + water use for ecoflow program
        watertxt.write_drainagearea_file(area_data=areas,
                                         save_path=ecoflow_dir,
                                         filename=file_name)

    waterapputils_logging.remove_loggers()

예제 #14

0

파일 보기

파일: specific_output_file_processing.py 프로젝트: jlant-usgs/waterapputils

def write_ecoflow_file_drainageareaxml(file_list, dir_name, file_name):
    """    
    Write a csv file containing a label (basin id number) and its corresponding area.

    Parameters
    ----------
    file_list : list
        List of WATERSimulation.xml files to process
    dir_name : string
        String name for output directory
    file_name : string
        String name for output file
    """   
    area_data = {}
    for f in file_list:
               
        filedir, filename = helpers.get_file_info(f)       

        ecoflow_dir = helpers.make_directory(path = filedir, directory_name = dir_name)

        helpers.print_input_output_info(input_dict = {"input_file": f}, output_dict = {"output_directory": ecoflow_dir})

        waterapputils_logging.initialize_loggers(output_dir = ecoflow_dir) 

        # read xml file
        waterxml_tree = waterxml.read_file(f)       

        # get area from each region from the xml file and sum for a total area
        project, study, simulation = waterxml.get_xml_data(waterxml_tree = waterxml_tree)

        # get the project name which is the same as the stationid
        stationid = project["ProjName"]

        # get the area means for each region
        areas = waterxml.get_study_unit_areas(simulation_dict = simulation)

        # calculate total area
        total_area = waterxml.calc_total_study_unit_areas(areas)

        # fill area_data with total area
        area_data[stationid] = total_area

    # convert from km**2 to mi**2
    area_data = helpers.convert_area_values(area_data, in_units = "km2", out_units = "mi2")

    # write timeseries of dishcarge + water use for ecoflow program
    watertxt.write_drainagearea_file(area_data = area_data, save_path = ecoflow_dir, filename = file_name)
            
    waterapputils_logging.remove_loggers()

예제 #15

0

파일 보기

파일: specific_output_file_processing.py 프로젝트: jlant-usgs/waterapputils

def write_ecoflow_file_drainageareashp(file_list, dir_name, file_name, label_field, query_field):
    """    
    Write a csv file containing a label (basin id number) and its corresponding area.
    Two methods to get the area from each respective shapefile:

        1. if shapefile(s) has an area field and user specifies it in user_settings.py 
        under the *basin_shapefile_area_field* variable then get the area for each
        basin using the specified area field name (query_field)
        
        2. if shapefile(s) do not have an area field or user does not specify is in
        user_settings.py, then calculate it using osgeo and label each basin according
        to *basin_shapefile_id_field* in user_settings.py 

    Parameters
    ----------
    file_list : list
        List of files to process; files are shapefiles
    settings : dictionary
        Dictionary of user settings
    label_field : string
        String name of an id field (basin id number) to associate with a basin
    query_field : string
        String name of an area field

    Notes
    -----
    Uses settings set in user_settings.py  
    """   
    for f in file_list:
               
        filedir, filename = helpers.get_file_info(f)       

        ecoflow_dir = helpers.make_directory(path = filedir, directory_name = dir_name)

        waterapputils_logging.initialize_loggers(output_dir = ecoflow_dir) 
 
        helpers.print_input_output_info(input_dict = {"input_file": f}, output_dict = {"output_directory": ecoflow_dir})

        basin_shapefile = osgeo.ogr.Open(f)  

        # get the areas for each region
        areas = spatialvectors.get_areas_dict(shapefile = basin_shapefile, id_field = label_field, query_field = query_field)

        # write timeseries of dishcarge + water use for ecoflow program
        watertxt.write_drainagearea_file(area_data = areas, save_path = ecoflow_dir, filename = file_name)
            
    waterapputils_logging.remove_loggers()

예제 #16

0

파일 보기

파일: specific_output_file_processing.py 프로젝트: jlant-usgs/waterapputils

def write_oasis_file(file_list, dir_name, file_name):

    for f in file_list:
               
        filedir, filename = helpers.get_file_info(f)       

        oasis_dir = helpers.make_directory(path = filedir, directory_name = dir_name)

        helpers.print_input_output_info(input_dict = {"input_file": f}, output_dict = {"output_directory": oasis_dir})

        waterapputils_logging.initialize_loggers(output_dir = oasis_dir) 

        watertxt_data = watertxt.read_file(f)      

        # write timeseries of discharge + water use for OASIS
        watertxt.write_timeseries_file(watertxt_data = watertxt_data, name = "Discharge + Water Use", save_path = oasis_dir, filename = "-".join([watertxt_data["stationid"], file_name]))
                
        waterapputils_logging.remove_loggers()

예제 #17

0

파일 보기

def write_ecoflow_file_stationid(file_list,
                                 dir_name,
                                 file_name,
                                 parameter_name="Discharge + Water Use"):
    """    
    Write a csv file containing a timeseries for a particular parameter contained in a WATER.txt file

    Parameters
    ----------
    file_list : list
        List of WATER.txt files to process
    dir_name : string
        String name for output directory
    file_name : string
        String name for output file
    parameter_name : string
        String name for a parameter contained in a WATER.txt file
    """

    for f in file_list:

        filedir, filename = helpers.get_file_info(f)

        ecoflow_dir = helpers.make_directory(path=filedir,
                                             directory_name=dir_name)

        helpers.print_input_output_info(
            input_dict={"input_file": f},
            output_dict={"output_directory": ecoflow_dir})

        waterapputils_logging.initialize_loggers(output_dir=ecoflow_dir)

        watertxt_data = watertxt.read_file(f)

        # write timeseries of discharge + water use for ecoflow program
        watertxt.write_timeseries_file_stationid(
            watertxt_data,
            name=parameter_name,
            save_path=ecoflow_dir,
            filename=file_name,
            stationid=watertxt_data["stationid"])

        waterapputils_logging.remove_loggers()

예제 #18

0

파일 보기

파일: wateruse_processing.py 프로젝트: jlant-usgs/waterapputils

def process_intersecting_centroids(intersecting_centroids, settings, ecoflow_dir, oasis_dir):
    """    
    Apply water use data to a WATER \*.txt file. The new file created is saved to the same
    directory as the \*.xml file.

    Parameters
    ----------
    intersecting_centroids : dictionary
        Dictionary containing lists of values for a particular field that were intersected by another shapefile.  
    settings : dictionary
        Dictionary of user settings
    ecoflow_dir : string
        String path to directory that will contain output specific for ecoflow program
    oasis_dir : string
        String path to directory that will contain output specific for oasis

    Notes
    -----
    Uses settings set in user_settings.py 
    """      
    # create a file for the output  
    for featureid, centroids in intersecting_centroids.iteritems():

        # get sum of the water use data
        if settings["wateruse_factor_file"]:
            total_wateruse_dict = wateruse.get_all_total_wateruse(wateruse_files = settings["wateruse_files"], id_list = centroids, wateruse_factor_file = settings["wateruse_factor_file"], in_cfs = True)

        else:
            total_wateruse_dict = wateruse.get_all_total_wateruse(wateruse_files = settings["wateruse_files"], id_list = centroids, wateruse_factor_file = None, in_cfs = True)

        # print monthly output in nice format to info file
        print("FeatureId: {}\n    Centroids: {}\n    Total Water Use:\n".format(featureid, centroids))  
        helpers.print_monthly_dict(monthly_dict = total_wateruse_dict)
       
        # get the txt data file that has a parent directory matching the current featureid
        if settings["is_batch_simulation"]:
            path = os.path.join(settings["simulation_directory"], featureid)
        else:
            path = settings["simulation_directory"]

        # find the WATER.txt file 
        watertxt_file = helpers.find_file(name = settings["water_text_file_name"], path = path)

        # get file info
        watertxt_dir, watertxt_filename = helpers.get_file_info(watertxt_file)       

        # create an output directory
        output_dir = helpers.make_directory(path = watertxt_dir, directory_name = settings["wateruse_directory_name"])
        
        # initialize error logging
        waterapputils_logging.initialize_loggers(output_dir = output_dir)

        # read the txt
        watertxt_data = watertxt.read_file(watertxt_file)            

        # apply water use
        watertxt_data = watertxt.apply_wateruse(watertxt_data, wateruse_totals = total_wateruse_dict) 

        # write updated txt
        watertxt_with_wateruse_file = settings["wateruse_prepend_name"] + watertxt_filename

        watertxt.write_file(watertxt_data = watertxt_data, save_path = output_dir, filename = watertxt_with_wateruse_file)              

        # plot 
        updated_watertxt_file = os.path.join(output_dir, watertxt_with_wateruse_file)
        water_files_processing.process_water_files(file_list = [updated_watertxt_file], settings = settings, print_data = True)

        # write timeseries of discharge + water use for OASIS
        watertxt.write_timeseries_file(watertxt_data = watertxt_data, name = settings["ecoflow_parameter_name"], save_path = oasis_dir, filename = "-".join([watertxt_data["stationid"], settings["oasis_file_name"]]))

        # write timeseries of dishcarge + water use for ecoflow program
        watertxt.write_timeseries_file_stationid(watertxt_data, name = settings["ecoflow_parameter_name"], save_path = ecoflow_dir, filename = "", stationid = watertxt_data["stationid"])

예제 #19

0

파일 보기

파일: rankings.py 프로젝트: woongbinchoi/English-Premier-League-Prediction

def get_rankings(from_file, to_file, date=None, include_prediction=False, predicted_date_so_far=None, ranking_summary_file=None):
    if date:
        datet = datetime.strptime(date, '%Y-%m-%d')
    if not (from_file and to_file):
        raise ValueError("Error: get_rankings: Give a from_file/to_file pair")
    
    df = pd.read_csv(from_file)

    scores = dict()
    for _, row in df.iterrows():
        if type(row['Date']) is float:
            continue
        if date and datetime.strptime(row['Date'], '%Y-%m-%d') > datet:
            break
        # That means this row is a prediction value
        if not include_prediction and row['FTHG'] == 0 and row['FTAG'] == 0 and row['FTR'] != 'D':
            break
        # Meaning this game is not played and not predicted yet
        if row['FTR'] is np.nan:
            break
        home = row['HomeTeam']
        away = row['AwayTeam']
        if home not in scores:
            scores[home] = {
                'match_played': 0,
                'points': 0,
                'goal_diff': 0,
                'win': 0
            }
        if away not in scores:
            scores[away] = {
                'match_played': 0,
                'points': 0,
                'goal_diff': 0,
                'win': 0
            }

        scores[home]['match_played'] += 1
        scores[away]['match_played'] += 1
        match_goal_diff = row['FTHG'] - row['FTAG']
        scores[home]['goal_diff'] += match_goal_diff
        scores[away]['goal_diff'] -= match_goal_diff
        if row['FTR'] == 'H':
            scores[home]['points'] += 3
            scores[home]['win'] += 1
        elif row['FTR'] == 'A':
            scores[away]['points'] += 3
            scores[away]['win'] += 1
        else:
            scores[home]['points'] += 1
            scores[away]['points'] += 1

    teams = sorted(scores, key=lambda k: scores[k]['points'], reverse=True)
    points, goal_diff, win_rate = [], [], []
    for name in teams:
        val = scores[name]
        points.append(val['points'])
        goal_diff.append(val['goal_diff'])
        win_rate.append(val['win'] / val['match_played'])
    df = pd.DataFrame(list(zip(teams, points, goal_diff, win_rate)), columns=['Team', 'Points', 'Goal_Diff', 'Win_Rate'])
    
    make_directory(to_file)
    df.to_csv(to_file, index=False)
    
    if include_prediction and predicted_date_so_far and ranking_summary_file:
        round_df = pd.DataFrame(list(zip(teams, points)), columns=['Team', predicted_date_so_far])
        round_df.set_index('Team', inplace=True)
        round_df = round_df.transpose()
        round_df.index.name = 'Date'

        if os.path.isfile(ranking_summary_file):
            summary_df = pd.read_csv(ranking_summary_file)
            summary_df.set_index('Date', inplace=True)
            summary_df = pd.concat([summary_df, round_df], sort=False)
            summary_df.to_csv(ranking_summary_file)
        else:
            round_df.to_csv(ranking_summary_file)

    return teams[0]

예제 #20

0

파일 보기

파일: water_files_processing.py 프로젝트: tnwillia/waterapputils

def process_cmp(file_list, settings, print_data=True):
    """
    Compare two WATER text files according to options contained in arguments parameter.

    Parameters
    ----------
    file_list : list 
        List of files to parse, process, and plot.        
    arguments : argparse object
        An argparse object containing user options.    
    """

    print("Comparing WATER files ...\n")

    water_file1 = file_list[0]
    water_file2 = file_list[1]

    filedir1, filename1 = helpers.get_file_info(water_file1)
    filedir2, filename2 = helpers.get_file_info(water_file2)

    ext1 = os.path.splitext(filename1)[1]
    ext2 = os.path.splitext(filename2)[1]

    assert ext1 == ".txt" or ext1 == ".xml", "Can not process file {}. File extension {} is not .txt or .xml".format(
        filename1, ext1)
    assert ext2 == ".txt" or ext2 == ".xml", "Can not process file {}. File extension {} is not .txt or .xml".format(
        filename2, ext2)

    if ext1 == ".txt" and ext2 == ".txt":
        output_dir = helpers.make_directory(
            path=filedir1, directory_name=settings["watertxt_directory_name"])
        helpers.print_input_output_info(
            input_dict={
                "input_file_1": water_file1,
                "input_file_2": water_file2
            },
            output_dict={"output_directory": output_dir})
        waterapputils_logging.initialize_loggers(output_dir=output_dir)

        watertxt_data1 = watertxt.read_file(water_file1)
        watertxt_data2 = watertxt.read_file(water_file2)
        watertxt_viewer.plot_watertxt_comparison(watertxt_data1,
                                                 watertxt_data2,
                                                 save_path=output_dir)
        if print_data:
            watertxt_viewer.print_watertxt_data(watertxt_data1)
            watertxt_viewer.print_watertxt_data(watertxt_data2)

    elif ext1 == ".xml" and ext2 == ".xml":
        output_dir = helpers.make_directory(
            path=filedir1, directory_name=settings["waterxml_directory_name"])
        helpers.print_input_output_info(
            input_dict={
                "input_file_1": water_file1,
                "input_file_2": water_file2
            },
            output_dict={"output_directory": output_dir})
        waterapputils_logging.initialize_loggers(output_dir=output_dir)

        waterxml_data1 = waterxml.read_file(water_file1)
        waterxml_data2 = waterxml.read_file(water_file2)
        waterxml_viewer.plot_waterxml_timeseries_comparison(
            waterxml_data1, waterxml_data2, save_path=output_dir)
        if print_data:
            waterxml_viewer.print_waterxml_data(waterxml_data1)
            waterxml_viewer.print_waterxml_data(waterxml_data2)

    else:
        print(
            "Can not process files {} and {}. File extensions {} and {} both need to be .txt or .xml"
            .format(filename1, filename2, ext1, ext2))

    waterapputils_logging.remove_loggers()

예제 #21

0

파일 보기

파일: water_files_processing.py 프로젝트: jlant-usgs/waterapputils

def process_cmp(file_list, settings, print_data=True):
    """
    Compare two WATER text files according to options contained in arguments parameter.

    Parameters
    ----------
    file_list : list 
        List of files to parse, process, and plot.        
    arguments : argparse object
        An argparse object containing user options.    
    """

    print("Comparing WATER files ...\n")

    water_file1 = file_list[0]
    water_file2 = file_list[1]

    filedir1, filename1 = helpers.get_file_info(water_file1)
    filedir2, filename2 = helpers.get_file_info(water_file2)

    ext1 = os.path.splitext(filename1)[1]
    ext2 = os.path.splitext(filename2)[1]

    assert ext1 == ".txt" or ext1 == ".xml", "Can not process file {}. File extension {} is not .txt or .xml".format(
        filename1, ext1
    )
    assert ext2 == ".txt" or ext2 == ".xml", "Can not process file {}. File extension {} is not .txt or .xml".format(
        filename2, ext2
    )

    if ext1 == ".txt" and ext2 == ".txt":
        output_dir = helpers.make_directory(path=filedir1, directory_name=settings["watertxt_directory_name"])
        helpers.print_input_output_info(
            input_dict={"input_file_1": water_file1, "input_file_2": water_file2},
            output_dict={"output_directory": output_dir},
        )
        waterapputils_logging.initialize_loggers(output_dir=output_dir)

        watertxt_data1 = watertxt.read_file(water_file1)
        watertxt_data2 = watertxt.read_file(water_file2)
        watertxt_viewer.plot_watertxt_comparison(watertxt_data1, watertxt_data2, save_path=output_dir)
        if print_data:
            watertxt_viewer.print_watertxt_data(watertxt_data1)
            watertxt_viewer.print_watertxt_data(watertxt_data2)

    elif ext1 == ".xml" and ext2 == ".xml":
        output_dir = helpers.make_directory(path=filedir1, directory_name=settings["waterxml_directory_name"])
        helpers.print_input_output_info(
            input_dict={"input_file_1": water_file1, "input_file_2": water_file2},
            output_dict={"output_directory": output_dir},
        )
        waterapputils_logging.initialize_loggers(output_dir=output_dir)

        waterxml_data1 = waterxml.read_file(water_file1)
        waterxml_data2 = waterxml.read_file(water_file2)
        waterxml_viewer.plot_waterxml_timeseries_comparison(waterxml_data1, waterxml_data2, save_path=output_dir)
        if print_data:
            waterxml_viewer.print_waterxml_data(waterxml_data1)
            waterxml_viewer.print_waterxml_data(waterxml_data2)

    else:
        print(
            "Can not process files {} and {}. File extensions {} and {} both need to be .txt or .xml".format(
                filename1, filename2, ext1, ext2
            )
        )

    waterapputils_logging.remove_loggers()

예제 #22

0

파일 보기

파일: wateruse_processing.py 프로젝트: tnwillia/waterapputils

def process_intersecting_centroids(intersecting_centroids, settings,
                                   ecoflow_dir, oasis_dir):
    """    
    Apply water use data to a WATER \*.txt file. The new file created is saved to the same
    directory as the \*.xml file.

    Parameters
    ----------
    intersecting_centroids : dictionary
        Dictionary containing lists of values for a particular field that were intersected by another shapefile.  
    settings : dictionary
        Dictionary of user settings
    ecoflow_dir : string
        String path to directory that will contain output specific for ecoflow program
    oasis_dir : string
        String path to directory that will contain output specific for oasis

    Notes
    -----
    Uses settings set in user_settings.py 
    """
    # create a file for the output
    for featureid, centroids in intersecting_centroids.iteritems():

        # get sum of the water use data
        if settings["wateruse_factor_file"]:
            total_wateruse_dict = wateruse.get_all_total_wateruse(
                wateruse_files=settings["wateruse_files"],
                id_list=centroids,
                wateruse_factor_file=settings["wateruse_factor_file"],
                in_cfs=True)

        else:
            total_wateruse_dict = wateruse.get_all_total_wateruse(
                wateruse_files=settings["wateruse_files"],
                id_list=centroids,
                wateruse_factor_file=None,
                in_cfs=True)

        # print monthly output in nice format to info file
        print(
            "FeatureId: {}\n    Centroids: {}\n    Total Water Use:\n".format(
                featureid, centroids))
        helpers.print_monthly_dict(monthly_dict=total_wateruse_dict)

        # get the txt data file that has a parent directory matching the current featureid
        if settings["is_batch_simulation"]:
            path = os.path.join(settings["simulation_directory"], featureid)
        else:
            path = settings["simulation_directory"]

        # find the WATER.txt file
        watertxt_file = helpers.find_file(
            name=settings["water_text_file_name"], path=path)

        # get file info
        watertxt_dir, watertxt_filename = helpers.get_file_info(watertxt_file)

        # create an output directory
        output_dir = helpers.make_directory(
            path=watertxt_dir,
            directory_name=settings["wateruse_directory_name"])

        # initialize error logging
        waterapputils_logging.initialize_loggers(output_dir=output_dir)

        # read the txt
        watertxt_data = watertxt.read_file(watertxt_file)

        # apply water use
        watertxt_data = watertxt.apply_wateruse(
            watertxt_data, wateruse_totals=total_wateruse_dict)

        # write updated txt
        watertxt_with_wateruse_file = settings[
            "wateruse_prepend_name"] + watertxt_filename

        watertxt.write_file(watertxt_data=watertxt_data,
                            save_path=output_dir,
                            filename=watertxt_with_wateruse_file)

        # plot
        updated_watertxt_file = os.path.join(output_dir,
                                             watertxt_with_wateruse_file)
        water_files_processing.process_water_files(
            file_list=[updated_watertxt_file],
            settings=settings,
            print_data=True)

        # write timeseries of discharge + water use for OASIS
        watertxt.write_timeseries_file(watertxt_data=watertxt_data,
                                       name=settings["ecoflow_parameter_name"],
                                       save_path=oasis_dir,
                                       filename="-".join([
                                           watertxt_data["stationid"],
                                           settings["oasis_file_name"]
                                       ]))

        # write timeseries of dishcarge + water use for ecoflow program
        watertxt.write_timeseries_file_stationid(
            watertxt_data,
            name=settings["ecoflow_parameter_name"],
            save_path=ecoflow_dir,
            filename="",
            stationid=watertxt_data["stationid"])

예제 #23

0

파일 보기

파일: gcm_delta_processing.py 프로젝트: jlant-usgs/waterapputils

def process_intersecting_tiles(intersecting_tiles, settings, gcm_delta_dir):
    """    
    Apply water use data to a WATER \*.txt file. The new file created is saved to the same
    directory as the \*.xml file.

    Parameters
    ----------
    intersecting_tiles : dictionary
        Dictionary containing lists of values for a particular field that were intersected by another shapefile.  
    settings : dictionary
        Dictionary of user settings
    gcm_delta_dir : string 
        string path to ecoflow directory

    Notes
    -----
    Uses settings set in user_settings.py 
    """      
    # create a file for the output  
    for featureid, tiles in intersecting_tiles.iteritems():

        # get monthly average gcm delta values
        deltas_data_list, deltas_avg_dict = deltas.get_deltas(delta_files = settings["gcm_delta_files"], tiles = tiles) 

        # print monthly output in nice format to info file
        print("FeatureId: {}\n    Tiles: {}\n    Average GCM Deltas:\n".format(featureid, tiles))  
        for key in deltas_avg_dict.keys():
            print("    {}\n".format(key))
            helpers.print_monthly_dict(monthly_dict = deltas_avg_dict[key])
       
        # get the txt data file that has a parent directory matching the current featureid
        if settings["is_batch_simulation"]:
            path = os.path.join(settings["simulation_directory"], featureid)
        else:
            path = settings["simulation_directory"]

        # find the WATERSimulation.xml and WATER.txt files
        waterxml_file = helpers.find_file(name = settings["water_database_file_name"], path = path)
        watertxt_file = helpers.find_file(name = settings["water_text_file_name"], path = path)

        # get file info
        waterxml_dir, waterxml_filename = helpers.get_file_info(waterxml_file)       
        watertxt_dir, watertxt_filename = helpers.get_file_info(watertxt_file)   

        # create an output directory
        output_dir = helpers.make_directory(path = waterxml_dir, directory_name = settings["gcm_delta_directory_name"])
        
        # initialize error logging
        waterapputils_logging.initialize_loggers(output_dir = output_dir)

        # read the xml file
        waterxml_tree = waterxml.read_file(waterxml_file) 
        watertxt_data = watertxt.read_file(watertxt_file)            

        # apply gcm delta
        for key, value in deltas_avg_dict.iteritems():
            if key == "Ppt":
                waterxml.apply_factors(waterxml_tree = waterxml_tree, element = "ClimaticPrecipitationSeries", factors = deltas_avg_dict[key])

            elif key == "Tmax":
                waterxml.apply_factors(waterxml_tree = waterxml_tree, element = "ClimaticTemperatureSeries", factors = deltas_avg_dict[key])

            elif key == "PET":
                watertxt.apply_factors(watertxt_data, name = "PET", factors = deltas_avg_dict[key], is_additive = False)

        # update the project name in the updated xml
        project = waterxml.create_project_dict() 
        project = waterxml.fill_dict(waterxml_tree = waterxml_tree, data_dict = project, element = "Project", keys = project.keys())
        waterxml.change_element_value(waterxml_tree = waterxml_tree, element = "Project", child = "ProjName" , new_value = settings["gcm_delta_prepend_name"] + project["ProjName"])

        # write updated xml
        waterxml_with_gcm_delta_file = settings["gcm_delta_prepend_name"] + waterxml_filename

        waterxml.write_file(waterxml_tree = waterxml_tree, save_path = output_dir, filename = waterxml_with_gcm_delta_file)              

        # write the pet timeseries file
        watertxt.write_timeseries_file(watertxt_data, name = "PET", save_path = output_dir, filename = settings["pet_timeseries_file_name"])

        # plot 
        updated_waterxml_file = os.path.join(output_dir, waterxml_with_gcm_delta_file)
        water_files_processing.process_water_files(file_list = [updated_waterxml_file ], settings = settings, print_data = False)
        water_files_processing.process_cmp(file_list = [updated_waterxml_file, waterxml_file], settings = settings, print_data = False)

    # plot the gcm deltas 
    for deltas_data in deltas_data_list:
        deltas_viewer.plot_deltas_data(deltas_data = deltas_data, save_path = helpers.make_directory(path = gcm_delta_dir, directory_name = settings["gcm_delta_directory_name"]))

예제 #24

0

파일 보기

def predict_next_round(clf,
                       final_path,
                       current_raw_cleaned_path,
                       statistics=False,
                       stat_path=None,
                       first=True):
    # First indicates whether the one being predicted is the upcoming round

    # Load final data csv
    df = pd.read_csv(final_path)

    # Get the row count of the dataframe
    len_df = df.shape[0]

    # Normalize each columns and remove rows that should not be predicted yet
    df = prepare_data(df, drop_na=False)
    df = df.loc[(df['FTR'] != 'H') & (df['FTR'] != 'D') & (df['FTR'] != 'A')]
    df = df.drop(columns=['FTR'])

    if statistics:
        if stat_path is not None:
            make_directory(stat_path)
        else:
            raise ValueError(
                "specify 'stat_path' to save prediction result. Exiting...")

    if len(df) > 0:
        df_indices = [x - len_df for x in df.index]

        prediction = clf.predict(df).tolist()
        prediction_probability = clf.predict_proba(df).tolist()
        clf_classes = clf.classes_

        df_to_predict = pd.read_csv(current_raw_cleaned_path)
        len_df = df_to_predict.shape[0]

        print("{:20} {:20} {:20} {}".format("Home", "Away", "Predict",
                                            "Probability"))
        for (index, result, pred_prob) in zip(df_indices, prediction,
                                              prediction_probability):
            HT = df_to_predict.at[index + len_df, 'HomeTeam']
            AT = df_to_predict.at[index + len_df, 'AwayTeam']
            date_so_far = df_to_predict.at[index + len_df, 'Date']

            df_to_predict.at[index + len_df, 'FTR'] = result
            df_to_predict.at[index + len_df, 'FTHG'] = 0
            df_to_predict.at[index + len_df, 'FTAG'] = 0

            for (outcome, prob) in zip(clf_classes, pred_prob):
                df_to_predict.at[index + len_df, 'prob_' + outcome] = prob

            print("{:20} {:20} {:20} {}".format(HT, AT,
                                                HT if result == "H" else AT,
                                                max(pred_prob)))

        if statistics:
            if first:
                if os.path.exists(stat_path):
                    os.remove(stat_path)
                df_to_predict.to_csv(stat_path, index=False)
            else:
                if os.path.isfile(stat_path):
                    stat_df = pd.read_csv(stat_path)
                    stat_df.update(df_to_predict)
                    stat_df.to_csv(stat_path, index=False)
                else:
                    raise ValueError(
                        'FATAL ERROR: either set first=True, or feed stat_path.'
                    )

        df_to_predict = df_to_predict.drop(
            columns=['prob_' + outcome for outcome in clf_classes])
        df_to_predict.to_csv(current_raw_cleaned_path, index=False)
        return True, date_so_far
    else:
        print("There are no more games to make prediction")
        return False, None

예제 #25

0

파일 보기

def get_clf(final_file_path,
            model_confidence_csv_path,
            clf_file,
            recalculate=True):
    if not recalculate and os.path.isfile(clf_file):
        return joblib.load(clf_file), None, None

    # First load the data from csv file
    data = pd.read_csv(final_file_path)

    # Drop columns that are not needed and normalized each columns
    data = prepare_data(data, drop_na=True)
    data = data.loc[(data['FTR'] == 'H') | (data['FTR'] == 'D') |
                    (data['FTR'] == 'A')]

    # Divide data into features and label
    X_all = data.drop(columns=['FTR'])
    y_all = data['FTR']

    # List of Classifiers that we are going to run
    classifiers = [
        # Logistic Regressions
        LogisticRegression(),
        # Best param in this grid search
        LogisticRegression(penalty='l2',
                           solver='newton-cg',
                           multi_class='ovr',
                           C=0.1,
                           warm_start=True),
        LogisticRegression(penalty='l2',
                           solver='lbfgs',
                           multi_class='multinomial',
                           C=0.4,
                           warm_start=False),
        # SVC
        SVC(probability=True),
        SVC(C=0.3,
            class_weight=None,
            decision_function_shape='ovo',
            degree=1,
            kernel='rbf',
            probability=True,
            shrinking=True,
            tol=0.0005),
        SVC(C=0.28,
            class_weight=None,
            decision_function_shape='ovo',
            degree=1,
            kernel='rbf',
            probability=True,
            shrinking=True,
            tol=0.0002),
        # XGBoost
        xgb.XGBClassifier(),
        xgb.XGBClassifier(learning_rate=0.01,
                          n_estimators=1000,
                          max_depth=2,
                          min_child_weight=5,
                          gamma=0,
                          subsample=0.8,
                          colsample_bytree=0.7,
                          scale_pos_weight=0.8,
                          reg_alpha=1e-5,
                          booster='gbtree',
                          objective='multi:softprob'),
        # KNeighborsClassifier(),
        # RandomForestClassifier(),
        # GaussianNB(),
        # DecisionTreeClassifier(),
        # GradientBoostingClassifier(),
        # LinearSVC(),
        # SGDClassifier()
    ]

    # # Example of how to grid search classifiers
    # # Logistic Regression
    # clf_L = LogisticRegression()
    # parameters_L = {'penalty': ['l2'],
    #                 'solver': ['lbfgs', 'newton-cg', 'sag'],
    #                 'multi_class': ['ovr', 'multinomial'],
    #                 'C': [x * 0.1 + 0.1 for x in range(10)],
    #                 'warm_start': [True, False],
    #                 'fit_intercept':[True, False],
    #                 'class_weight':['balanced',None]}
    # f1_scorer_L = make_scorer(f1_score, labels=['H','D','A'], average = 'micro')
    # clf_L = get_grid_clf(clf_L, f1_scorer_L, parameters_L, X_all, y_all)
    # classifiers.append(clf_L)

    # # SVC
    # clf_L = SVC()
    # parameters_L = {
    #         'C': [x * 0.01 + 0.27 for x in range(5)],
    #         'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    #         'degree': [x + 1 for x in range(3)],
    #         'shrinking': [True, False],
    #         'tol':[x * 0.0005 + 0.0005 for x in range(3)],
    #         'class_weight':['balanced',None],
    #         'decision_function_shape': ['ovo', 'ovr']
    #         }
    # f1_scorer_L = make_scorer(f1_score, labels=['H','D','A'], average = 'micro')
    # clf_L = get_grid_clf(clf_L, f1_scorer_L, parameters_L, X_all, y_all)
    # classifiers.append(clf_L)

    # # XGBoost
    # clf_L = xgb.XGBClassifier()
    # parameters_L = {
    #         'learning_rate': [0.01],
    #         'n_estimators':[1000],
    #         'max_depth': [2],
    #         'min_child_weight': [5],
    #         'gamma': [0],
    #         'subsample': [0.8],
    #         'colsample_bytree': [0.7],
    #         'scale_pos_weight':[0.8],
    #         'reg_alpha':[1e-5],
    #         'booster': ['gbtree'],
    #         'objective': ['multi:softprob']
    #         }
    # f1_scorer_L = make_scorer(f1_score, labels=['H','D','A'], average = 'micro')
    # clf_L = get_grid_clf(clf_L, f1_scorer_L, parameters_L, X_all, y_all)
    # classifiers.append(clf_L)

    # We are going to record accuracies of each classifier prediction iteration
    len_classifiers = len(classifiers)
    result = [[] for _ in range(len_classifiers)]
    y_results = [[] for _ in range(len_classifiers + 1)]

    # Using 10-fold cross validation (Dividing the data into sub groups (90% to fit, 10% to test), and run
    # prediction with each classifiers using the sub groups as a dataset)
    split = 10
    kf = KFold(n_splits=split, shuffle=True)
    for split_index, (train_index, test_index) in enumerate(kf.split(X_all)):
        print("Processing {}/{} of KFold Cross Validation...".format(
            split_index + 1, split))
        X_train, X_test = X_all.iloc[train_index], X_all.iloc[test_index]
        y_train, y_test = y_all.iloc[train_index], y_all.iloc[test_index]
        y_results[len_classifiers] += y_test.tolist()

        for index, clf in enumerate(classifiers):
            print("KFold: {}/{}. clf_index: {}/{}.".format(
                split_index + 1, split, index + 1, len(classifiers)))
            confidence, predicted_result = train_predict(
                clf, X_train, y_train, X_test, y_test)
            result[index].append(confidence)
            y_results[index] += predicted_result.tolist()

    # Make a dictionary of average accuracies for each classifiers
    avg_dict, best_clf, best_clf_average = process_print_result(
        classifiers, result)

    # Put the result into csv file
    if os.path.isfile(model_confidence_csv_path):
        df = pd.read_csv(model_confidence_csv_path)
        newdf = pd.DataFrame(avg_dict, index=[df.shape[1]])
        df = pd.concat([df, newdf], ignore_index=True, sort=False)
    else:
        make_directory(model_confidence_csv_path)
        df = pd.DataFrame(avg_dict, index=[0])
    df.to_csv(model_confidence_csv_path, index=False)

    # Saves the classifier using joblib module
    joblib.dump(best_clf, clf_file)

    # Return the best classifier
    return best_clf, y_results, best_clf_average

예제 #26

0

파일 보기

파일: gcm_delta_processing.py 프로젝트: tnwillia/waterapputils

def process_intersecting_tiles(intersecting_tiles, settings, gcm_delta_dir):
    """    
    Apply water use data to a WATER \*.txt file. The new file created is saved to the same
    directory as the \*.xml file.

    Parameters
    ----------
    intersecting_tiles : dictionary
        Dictionary containing lists of values for a particular field that were intersected by another shapefile.  
    settings : dictionary
        Dictionary of user settings
    gcm_delta_dir : string 
        string path to ecoflow directory

    Notes
    -----
    Uses settings set in user_settings.py 
    """
    # create a file for the output
    for featureid, tiles in intersecting_tiles.iteritems():

        # get monthly average gcm delta values
        deltas_data_list, deltas_avg_dict = deltas.get_deltas(
            delta_files=settings["gcm_delta_files"], tiles=tiles)

        # print monthly output in nice format to info file
        print("FeatureId: {}\n    Tiles: {}\n    Average GCM Deltas:\n".format(
            featureid, tiles))
        for key in deltas_avg_dict.keys():
            print("    {}\n".format(key))
            helpers.print_monthly_dict(monthly_dict=deltas_avg_dict[key])

        # get the txt data file that has a parent directory matching the current featureid
        if settings["is_batch_simulation"]:
            path = os.path.join(settings["simulation_directory"], featureid)
        else:
            path = settings["simulation_directory"]

        # find the WATERSimulation.xml and WATER.txt files
        waterxml_file = helpers.find_file(
            name=settings["water_database_file_name"], path=path)
        watertxt_file = helpers.find_file(
            name=settings["water_text_file_name"], path=path)

        # get file info
        waterxml_dir, waterxml_filename = helpers.get_file_info(waterxml_file)
        watertxt_dir, watertxt_filename = helpers.get_file_info(watertxt_file)

        # create an output directory
        output_dir = helpers.make_directory(
            path=waterxml_dir,
            directory_name=settings["gcm_delta_directory_name"])

        # initialize error logging
        waterapputils_logging.initialize_loggers(output_dir=output_dir)

        # read the xml file
        waterxml_tree = waterxml.read_file(waterxml_file)
        watertxt_data = watertxt.read_file(watertxt_file)

        # apply gcm delta
        for key, value in deltas_avg_dict.iteritems():
            if key == "Ppt":
                waterxml.apply_factors(waterxml_tree=waterxml_tree,
                                       element="ClimaticPrecipitationSeries",
                                       factors=deltas_avg_dict[key])

            elif key == "Tmax":
                waterxml.apply_factors(waterxml_tree=waterxml_tree,
                                       element="ClimaticTemperatureSeries",
                                       factors=deltas_avg_dict[key])

            elif key == "PET":
                watertxt.apply_factors(watertxt_data,
                                       name="PET",
                                       factors=deltas_avg_dict[key],
                                       is_additive=False)

        # update the project name in the updated xml
        project = waterxml.create_project_dict()
        project = waterxml.fill_dict(waterxml_tree=waterxml_tree,
                                     data_dict=project,
                                     element="Project",
                                     keys=project.keys())
        waterxml.change_element_value(
            waterxml_tree=waterxml_tree,
            element="Project",
            child="ProjName",
            new_value=settings["gcm_delta_prepend_name"] + project["ProjName"])

        # write updated xml
        waterxml_with_gcm_delta_file = settings[
            "gcm_delta_prepend_name"] + waterxml_filename

        waterxml.write_file(waterxml_tree=waterxml_tree,
                            save_path=output_dir,
                            filename=waterxml_with_gcm_delta_file)

        # write the pet timeseries file
        watertxt.write_timeseries_file(
            watertxt_data,
            name="PET",
            save_path=output_dir,
            filename=settings["pet_timeseries_file_name"])

        # plot
        updated_waterxml_file = os.path.join(output_dir,
                                             waterxml_with_gcm_delta_file)
        water_files_processing.process_water_files(
            file_list=[updated_waterxml_file],
            settings=settings,
            print_data=False)
        water_files_processing.process_cmp(
            file_list=[updated_waterxml_file, waterxml_file],
            settings=settings,
            print_data=False)

    # plot the gcm deltas
    for deltas_data in deltas_data_list:
        deltas_viewer.plot_deltas_data(
            deltas_data=deltas_data,
            save_path=helpers.make_directory(
                path=gcm_delta_dir,
                directory_name=settings["gcm_delta_directory_name"]))