def __extract_multiple_features(feature1, feature2, filename):
    """
    Creates two files (one with unprocessed, and one with singular feature) containing questions that has the features

    Arguments:
        feature1: The first feature to look for
        feature2: The second feature to look for
        filename (str): File containing the features

    """
    new_index = old_index = 0
    up_name = "UP_" + filename.strip()
    path = const.FILEPATH_TRAINING_DATA + FILENAME_START
    unprocessed_df = load_training_data(path, False, exclude_site_tags=True)
    feature_df = DataFrame.from_csv(__get_filename(const.FILEPATH_FEATURE_DETECTOR, filename))
    new_up_dataframe = DataFrame(columns=unprocessed_df.columns.values)
    new_feat_dataframe = DataFrame(columns=unprocessed_df.columns.values)
    for question in feature_df[const.QUESTION_TEXT_KEY]:
        if feature1 in question:
            new_feat_dataframe.loc[new_index] = feature_df.loc[old_index].copy()
            new_up_dataframe.loc[new_index] = unprocessed_df.loc[old_index].copy()
            new_index += 1
        elif feature2 in question:
            new_feat_dataframe.loc[new_index] = feature_df.loc[old_index].copy()
            new_up_dataframe.loc[new_index] = unprocessed_df.loc[old_index].copy()
            new_index += 1
        old_index += 1
    new_up_dataframe.to_csv(__get_filename(NEW_PATH, up_name), encoding='utf-8')
    new_feat_dataframe.to_csv(__get_filename(NEW_PATH, filename), encoding='utf-8')
def __extract_single_features(feature):
    """
    Creates two files (one with unprocessed, and one with singular feature) containing questions that has the feature

    Arguments:
        feature: The feature(s) to look for

    Returns:
         tuple (pandas.DataFrame, pandas.DataFrame): Tuple that contains the dataframe with
          updated unprocessed questions (those that contains the given feature), and the
          other dataframe that has the features added to its question text
    """
    up_name = "UP_" + feature.strip()
    new_index = old_index = 0
    path = const.FILEPATH_TRAINING_DATA + FILENAME_START
    unprocessed_df = load_training_data(path, False, exclude_site_tags=True)
    feature_df = DataFrame.from_csv(__get_filename(const.FILEPATH_FEATURE_DETECTOR, feature))
    new_up_dataframe = DataFrame(columns=unprocessed_df.columns.values)
    new_feat_dataframe = DataFrame(columns=unprocessed_df.columns.values)
    for question in feature_df[const.QUESTION_TEXT_KEY]:
        if feature in question:
            new_feat_dataframe.loc[new_index] = feature_df.loc[old_index].copy()
            new_up_dataframe.loc[new_index] = unprocessed_df.loc[old_index].copy()
            new_index += 1
        old_index += 1
    new_up_dataframe.to_csv(__get_filename(NEW_PATH, up_name), encoding='utf-8')
    new_feat_dataframe.to_csv(__get_filename(NEW_PATH, feature), encoding='utf-8')
def __extract_features_from_list(feature_list, filename, up_name):
    """
    Compares questions against a list of features to select only those that contain them

    Arguments:
        feature_list: The feature(s) to look for
        filename (str): Name of file
        up_name (str): Name of the filtered unprocessed data set

    Returns:
         tuple (pandas.DataFrame, pandas.DataFrame): Tuple that contains the dataframe with
          updated unprocessed questions (those that contains the given feature), and the
          other dataframe that has the features added to its question text
    """
    new_index = old_index = 0
    path = const.FILEPATH_TRAINING_DATA + FILENAME_START
    unprocessed_df = load_training_data(path, False, exclude_site_tags=True)
    feature_df = DataFrame.from_csv(const.FILEPATH_TRAINING_DATA + filename + FILE_ENDING)
    new_up_dataframe = DataFrame(columns=unprocessed_df.columns.values)
    new_feat_dataframe = DataFrame(columns=unprocessed_df.columns.values)
    counter = 0
    found_feature = False
    for question in feature_df[const.QUESTION_TEXT_KEY]:
        while not found_feature and counter < len(feature_list):
            feature = feature_list[counter]
            if feature in question:
                new_feat_dataframe.loc[new_index] = feature_df.loc[old_index].copy()
                new_up_dataframe.loc[new_index] = unprocessed_df.loc[old_index].copy()
                new_index += 1
                found_feature = True
            counter += 1
        counter = 0
        old_index += 1
        found_feature = False
    new_up_dataframe.to_csv(__get_filename(NEW_PATH, up_name), encoding='utf-8')
    filename = NEW_PATH + filename + FILE_ENDING
    new_feat_dataframe.to_csv(filename, encoding='utf-8')