if not (isinstance(_arg1[0], str)):
        raise TypeError

    supportedLibraries = {"nltk", "textblob"}

    library = library.lower()
    if library not in supportedLibraries:
        raise ValueError

    scores = []
    if library == "nltk":
        sid = SentimentIntensityAnalyzer()
        for text in _arg1:
            sentimentResults = sid.polarity_scores(text)
            score = sentimentResults["compound"]
            scores.append(score)
    elif library == "textblob":
        for text in _arg1:
            currScore = TextBlob(text)
            scores.append(currScore.sentiment.polarity)
    return scores


if __name__ == "__main__":
    setup_utils.deploy_model(
        "Sentiment Analysis",
        SentimentAnalysis,
        "Returns a sentiment score between -1 and 1 for "
        "a given string",
    )
Esempio n. 2
0
    '''
    # one sample test with mean
    if len(_arg2) == 1:
        test_stat, p_value = stats.ttest_1samp(_arg1, _arg2)
        return p_value
    # two sample t-test where _arg1 is numeric and _arg2 is a binary factor
    elif len(set(_arg2)) == 2:
        # each sample in _arg1 needs to have a corresponding classification
        # in _arg2
        if not (len(_arg1) == len(_arg2)):
            raise ValueError
        class1, class2 = set(_arg2)
        sample1 = []
        sample2 = []
        for i in range(len(_arg1)):
            if _arg2[i] == class1:
                sample1.append(_arg1[i])
            else:
                sample2.append(_arg1[i])
        test_stat, p_value = stats.ttest_ind(sample1, sample2, equal_var=False)
        return p_value
    # arg1 is a sample and arg2 is a sample
    else:
        test_stat, p_value = stats.ttest_ind(_arg1, _arg2, equal_var=False)
        return p_value


if __name__ == '__main__':
    setup_utils.deploy_model('ttest', ttest,
                             'Returns the p-value form a t-test')
Esempio n. 3
0
                      '25 unique values')
                raise ValueError
            integerEncoded = labelEncoder.fit_transform(array(col))
            integerEncoded = integerEncoded.reshape(len(col), 1)
            oneHotEncoded = oneHotEncoder.fit_transform(integerEncoded)
            transformedMatrix = oneHotEncoded.transpose()
            encodedCols += list(transformedMatrix)

    dataDict = {}
    for i in range(len(encodedCols)):
        dataDict[f'col{1 + i}'] = list(encodedCols[i])

    if component <= 0 or component > len(dataDict):
        print('ERROR: Component specified must be >= 0 and '
              '<= number of arguments')
        raise ValueError

    df = pd.DataFrame(data=dataDict, dtype=float)
    scale = StandardScaler()
    scaledData = scale.fit_transform(df)

    pca = sklearnPCA()
    pcaComponents = pca.fit_transform(scaledData)

    return pcaComponents[:, component - 1].tolist()


if __name__ == '__main__':
    setup_utils.deploy_model('PCA', PCA,
                             'Returns the specified principal component')
Esempio n. 4
0
import scipy.stats as stats
from tabpy.models.utils import setup_utils


def anova(_arg1, _arg2, *_argN):
    """
    ANOVA is a statistical hypothesis test that is used to compare
    two or more group means for equality.For more information on
    the function and how to use it please refer to tabpy-tools.md
    """

    cols = [_arg1, _arg2] + list(_argN)
    for col in cols:
        if not isinstance(col[0], (int, float)):
            print("values must be numeric")
            raise ValueError
    _, p_value = stats.f_oneway(_arg1, _arg2, *_argN)
    return p_value


if __name__ == "__main__":
    setup_utils.deploy_model("anova", anova,
                             "Returns the p-value form an ANOVA test")
Esempio n. 5
0
    """
    # one sample test with mean
    if len(_arg2) == 1:
        test_stat, p_value = stats.ttest_1samp(_arg1, _arg2)
        return p_value
    # two sample t-test where _arg1 is numeric and _arg2 is a binary factor
    elif len(set(_arg2)) == 2:
        # each sample in _arg1 needs to have a corresponding classification
        # in _arg2
        if not (len(_arg1) == len(_arg2)):
            raise ValueError
        class1, class2 = set(_arg2)
        sample1 = []
        sample2 = []
        for i in range(len(_arg1)):
            if _arg2[i] == class1:
                sample1.append(_arg1[i])
            else:
                sample2.append(_arg1[i])
        test_stat, p_value = stats.ttest_ind(sample1, sample2, equal_var=False)
        return p_value
    # arg1 is a sample and arg2 is a sample
    else:
        test_stat, p_value = stats.ttest_ind(_arg1, _arg2, equal_var=False)
        return p_value


if __name__ == "__main__":
    setup_utils.deploy_model("ttest", ttest,
                             "Returns the p-value form a t-test")
Esempio n. 6
0
import scipy.stats as stats
from tabpy.models.utils import setup_utils


def anova(_arg1, _arg2, *_argN):
    '''
    ANOVA is a statistical hypothesis test that is used to compare
    two or more group means for equality.For more information on
    the function and how to use it please refer to tabpy-tools.md
    '''

    cols = [_arg1, _arg2] + list(_argN)
    for col in cols:
        if not isinstance(col[0], (int, float)):
            print("values must be numeric")
            raise ValueError
    _, p_value = stats.f_oneway(_arg1, _arg2, *_argN)
    return p_value


if __name__ == '__main__':
    setup_utils.deploy_model(
        'anova',
        anova,
        'Returns the p-value form an ANOVA test')
Esempio n. 7
0
    tabpy-tools.md
    '''
    if not (isinstance(_arg1[0], str)):
        raise TypeError

    supportedLibraries = {'nltk', 'textblob'}

    library = library.lower()
    if library not in supportedLibraries:
        raise ValueError

    scores = []
    if library == 'nltk':
        sid = SentimentIntensityAnalyzer()
        for text in _arg1:
            sentimentResults = sid.polarity_scores(text)
            score = sentimentResults['compound']
            scores.append(score)
    elif library == 'textblob':
        for text in _arg1:
            currScore = TextBlob(text)
            scores.append(currScore.sentiment.polarity)
    return scores


if __name__ == '__main__':
    setup_utils.deploy_model(
        'Sentiment Analysis', SentimentAnalysis,
        'Returns a sentiment score between -1 and 1 for '
        'a given string')
Esempio n. 8
0
                      "25 unique values")
                raise ValueError
            integerEncoded = labelEncoder.fit_transform(array(col))
            integerEncoded = integerEncoded.reshape(len(col), 1)
            oneHotEncoded = oneHotEncoder.fit_transform(integerEncoded)
            transformedMatrix = oneHotEncoded.transpose()
            encodedCols += list(transformedMatrix)

    dataDict = {}
    for i in range(len(encodedCols)):
        dataDict[f"col{1 + i}"] = list(encodedCols[i])

    if component <= 0 or component > len(dataDict):
        print("ERROR: Component specified must be >= 0 and "
              "<= number of arguments")
        raise ValueError

    df = pd.DataFrame(data=dataDict, dtype=float)
    scale = StandardScaler()
    scaledData = scale.fit_transform(df)

    pca = sklearnPCA()
    pcaComponents = pca.fit_transform(scaledData)

    return pcaComponents[:, component - 1].tolist()


if __name__ == "__main__":
    setup_utils.deploy_model("PCA", PCA,
                             "Returns the specified principal component")