if not (isinstance(_arg1[0], str)): raise TypeError supportedLibraries = {"nltk", "textblob"} library = library.lower() if library not in supportedLibraries: raise ValueError scores = [] if library == "nltk": sid = SentimentIntensityAnalyzer() for text in _arg1: sentimentResults = sid.polarity_scores(text) score = sentimentResults["compound"] scores.append(score) elif library == "textblob": for text in _arg1: currScore = TextBlob(text) scores.append(currScore.sentiment.polarity) return scores if __name__ == "__main__": setup_utils.deploy_model( "Sentiment Analysis", SentimentAnalysis, "Returns a sentiment score between -1 and 1 for " "a given string", )
''' # one sample test with mean if len(_arg2) == 1: test_stat, p_value = stats.ttest_1samp(_arg1, _arg2) return p_value # two sample t-test where _arg1 is numeric and _arg2 is a binary factor elif len(set(_arg2)) == 2: # each sample in _arg1 needs to have a corresponding classification # in _arg2 if not (len(_arg1) == len(_arg2)): raise ValueError class1, class2 = set(_arg2) sample1 = [] sample2 = [] for i in range(len(_arg1)): if _arg2[i] == class1: sample1.append(_arg1[i]) else: sample2.append(_arg1[i]) test_stat, p_value = stats.ttest_ind(sample1, sample2, equal_var=False) return p_value # arg1 is a sample and arg2 is a sample else: test_stat, p_value = stats.ttest_ind(_arg1, _arg2, equal_var=False) return p_value if __name__ == '__main__': setup_utils.deploy_model('ttest', ttest, 'Returns the p-value form a t-test')
'25 unique values') raise ValueError integerEncoded = labelEncoder.fit_transform(array(col)) integerEncoded = integerEncoded.reshape(len(col), 1) oneHotEncoded = oneHotEncoder.fit_transform(integerEncoded) transformedMatrix = oneHotEncoded.transpose() encodedCols += list(transformedMatrix) dataDict = {} for i in range(len(encodedCols)): dataDict[f'col{1 + i}'] = list(encodedCols[i]) if component <= 0 or component > len(dataDict): print('ERROR: Component specified must be >= 0 and ' '<= number of arguments') raise ValueError df = pd.DataFrame(data=dataDict, dtype=float) scale = StandardScaler() scaledData = scale.fit_transform(df) pca = sklearnPCA() pcaComponents = pca.fit_transform(scaledData) return pcaComponents[:, component - 1].tolist() if __name__ == '__main__': setup_utils.deploy_model('PCA', PCA, 'Returns the specified principal component')
import scipy.stats as stats from tabpy.models.utils import setup_utils def anova(_arg1, _arg2, *_argN): """ ANOVA is a statistical hypothesis test that is used to compare two or more group means for equality.For more information on the function and how to use it please refer to tabpy-tools.md """ cols = [_arg1, _arg2] + list(_argN) for col in cols: if not isinstance(col[0], (int, float)): print("values must be numeric") raise ValueError _, p_value = stats.f_oneway(_arg1, _arg2, *_argN) return p_value if __name__ == "__main__": setup_utils.deploy_model("anova", anova, "Returns the p-value form an ANOVA test")
""" # one sample test with mean if len(_arg2) == 1: test_stat, p_value = stats.ttest_1samp(_arg1, _arg2) return p_value # two sample t-test where _arg1 is numeric and _arg2 is a binary factor elif len(set(_arg2)) == 2: # each sample in _arg1 needs to have a corresponding classification # in _arg2 if not (len(_arg1) == len(_arg2)): raise ValueError class1, class2 = set(_arg2) sample1 = [] sample2 = [] for i in range(len(_arg1)): if _arg2[i] == class1: sample1.append(_arg1[i]) else: sample2.append(_arg1[i]) test_stat, p_value = stats.ttest_ind(sample1, sample2, equal_var=False) return p_value # arg1 is a sample and arg2 is a sample else: test_stat, p_value = stats.ttest_ind(_arg1, _arg2, equal_var=False) return p_value if __name__ == "__main__": setup_utils.deploy_model("ttest", ttest, "Returns the p-value form a t-test")
import scipy.stats as stats from tabpy.models.utils import setup_utils def anova(_arg1, _arg2, *_argN): ''' ANOVA is a statistical hypothesis test that is used to compare two or more group means for equality.For more information on the function and how to use it please refer to tabpy-tools.md ''' cols = [_arg1, _arg2] + list(_argN) for col in cols: if not isinstance(col[0], (int, float)): print("values must be numeric") raise ValueError _, p_value = stats.f_oneway(_arg1, _arg2, *_argN) return p_value if __name__ == '__main__': setup_utils.deploy_model( 'anova', anova, 'Returns the p-value form an ANOVA test')
tabpy-tools.md ''' if not (isinstance(_arg1[0], str)): raise TypeError supportedLibraries = {'nltk', 'textblob'} library = library.lower() if library not in supportedLibraries: raise ValueError scores = [] if library == 'nltk': sid = SentimentIntensityAnalyzer() for text in _arg1: sentimentResults = sid.polarity_scores(text) score = sentimentResults['compound'] scores.append(score) elif library == 'textblob': for text in _arg1: currScore = TextBlob(text) scores.append(currScore.sentiment.polarity) return scores if __name__ == '__main__': setup_utils.deploy_model( 'Sentiment Analysis', SentimentAnalysis, 'Returns a sentiment score between -1 and 1 for ' 'a given string')
"25 unique values") raise ValueError integerEncoded = labelEncoder.fit_transform(array(col)) integerEncoded = integerEncoded.reshape(len(col), 1) oneHotEncoded = oneHotEncoder.fit_transform(integerEncoded) transformedMatrix = oneHotEncoded.transpose() encodedCols += list(transformedMatrix) dataDict = {} for i in range(len(encodedCols)): dataDict[f"col{1 + i}"] = list(encodedCols[i]) if component <= 0 or component > len(dataDict): print("ERROR: Component specified must be >= 0 and " "<= number of arguments") raise ValueError df = pd.DataFrame(data=dataDict, dtype=float) scale = StandardScaler() scaledData = scale.fit_transform(df) pca = sklearnPCA() pcaComponents = pca.fit_transform(scaledData) return pcaComponents[:, component - 1].tolist() if __name__ == "__main__": setup_utils.deploy_model("PCA", PCA, "Returns the specified principal component")