Example #1
0
def load_data(data_dir: WindowsPath,
              filename: str = None) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """Loading the two data sets from xlsx files.
    
    Keyword arguments
    data_dir -- a pathlib.WindowsPath object to the DATA folder
    filename -- a string that contains the location of the data, if None then
        load the original two Excel files
    """

    if filename:
        path = data_dir.joinpath(filename)

        if '.csv' in filename:
            result = pd.read_csv(path)
        # Else assume it's a pickle file
        else:
            with open(path, 'rb') as infile:
                result = pickle.load(infile)
    else:
        wb = xw.Book(str(data_dir.joinpath('CL_20190823.xlsx')))
        cl_df = wb.sheets[0].used_range.options(pd.DataFrame,
                                                index=False,
                                                header=True).value

        wb = xw.Book(str(data_dir.joinpath('WF_20190826.xlsx')))
        wf_df = wb.sheets[0].used_range.options(pd.DataFrame,
                                                index=False,
                                                header=True).value

        wb.app.quit()

        result = (cl_df, wf_df)

    return result
Example #2
0
def request_api(student_comments: pd.Series,
                endpoint: str,
                num_batches: int = 50,
                save: bool = True,
                folder: WindowsPath = None) -> pd.Series:
    """Sends student comments to the LUIS.ai API in batches and saves the 
    intemediate results into the OUTPUT folder
    
    Keyword arguments
    student_comments -- the pd.Series that contains the student comments in 
        natural language
    endpoint -- the luis endpoint
    num_batches -- the number of batches into which the comments would be 
        grouped and sent to the api
    save -- a boolean that saves the raw json response to local disk
    folder -- a location to which to save the response data, defaults to 
        /OUTPUT/LUIS/
    """
    if save and folder is None:
        folder = Path.cwd().joinpath('OUTPUT').joinpath('LUIS')

    for i, batch in enumerate(np.array_split(student_comments, num_batches)):
        print(f'Processing batch {i} of {num_batches}:')

        luis_result = batch.apply(lambda x: requests.get(f'{endpoint}{x}'))

        # Saving the results to pickle
        filename = f'luis_result_{str(i).zfill(4)}'
        luis_result.to_pickle(folder.joinpath(filename))

        print(f'Saved to {folder.joinpath(filename)}.')
Example #3
0
def path_settings(directory):
    dest = os.getcwd()

    if system() == 'Windows':
        appdata = WindowsPath(os.getenv('APPDATA'))
        dest = appdata.joinpath(directory)
    else:
        dest = PosixPath('/etc').joinpath(directory)

    dest.mkdir(parents = True, exist_ok = True)

    return dest
Example #4
0
def load_config(filename: str, config_dir: WindowsPath) -> pd.DataFrame:
    """Load csv config files from the /CONFIG/ folder.
    
    Keyword arguments
    filename -- name of the config file to be loaded, it is assumed to be of 
    csv format
    data_dir -- a pathlib.WindowsPath object to the DATA folder.
    """

    filename = filename + '.csv' if '.csv' not in filename else filename

    config_path = config_dir.joinpath(filename)

    return pd.read_csv(config_path)
Example #5
0
def load_column(path: WindowsPath, filename: str):
    """Returns a pickled Series filename path
    
    Keyword arguments
    path -- a WindowsPath object 
    filename -- a string containing the name of the file
    """

    with open(path.joinpath(filename), 'rb') as infile:
        s = pickle.load(infile)

    s.name = filename

    return s
Example #6
0
def save_object(obj, filename: str, output_dir: WindowsPath):
    """Saves the obj as a pickle file in the output_dir as filename
    
    Keyword arguments
    obj -- the object to be saved
    filename -- the name of the file
    output_dir -- a pathlib.WindowsPath object into which the pickle file will 
    be saved
    """

    path = output_dir.joinpath(filename)

    print(f'Pickling to {path}.')

    with open(path, 'wb') as outfile:
        pickle.dump(obj, outfile)
Example #7
0
def get_luis_url(folder: WindowsPath = None) -> str:
    """Create the luis api query url from a csv. Requires the csv to contain
    the endpoint url, app id and primary key. These can be obtained from the
    luis.ai site:
        https://www.luis.ai/applications/{app_ic}/versions/0.1/manage/endpoints
    """
    if folder is None:
        folder = Path.cwd().joinpath('CONFIG')

    path = folder.joinpath('luis_keys.csv')

    df = pd.read_csv(path, index_col='key')
    endpoint = df.loc['endpoint', 'value']
    app_id = df.loc['app_id', 'value']
    primary_key = df.loc['subscription_key', 'value']

    result = (f'{endpoint}luis/v2.0/apps/{app_id}?verbose=true&timezoneOffset'
              f'=0&subscription-key={primary_key}&q=')

    return result
Example #8
0
def save_sets(X_train: pd.DataFrame = None,
              y_train: pd.Series = None,
              X_val: pd.DataFrame = None,
              y_val: pd.Series = None,
              X_test: pd.DataFrame = None,
              y_test: pd.Series = None,
              path: WindowsPath = None):
    """Save the different sets locally

    Parameters
    ----------
    X_train: Numpy Array
        Features for the training set
    y_train: Numpy Array
        Target for the training set
    X_val: Numpy Array
        Features for the validation set
    y_val: Numpy Array
        Target for the validation set
    X_test: Numpy Array
        Features for the testing set
    y_test: Numpy Array
        Target for the testing set
    path : str
        Path to the folder where the sets will be saved (default: '../data/processed/')

    Returns
    -------
    """
    if X_train is not None:
        X_train.to_csv(path.joinpath('X_train.csv'), index=False)
    if y_train is not None:
        y_train.to_csv(path.joinpath('y_train.csv'), index=False)
    if X_val is not None:
        X_val.to_csv(path.joinpath('X_val.csv'), index=False)
    if y_val is not None:
        y_val.to_csv(path.joinpath('y_val.csv'), index=False)
    if X_test is not None:
        X_test.to_csv(path.joinpath('X_test.csv'), index=False)
    if y_test is not None:
        y_test.to_csv(path.joinpath('y_test.csv'), index=False)