def loadMetrics(filenames): data = [] #loading nlpnet tagger = POSTagger(r'var/nlpnet', language='pt') labels = ['Pausality', 'Emotivity'] #loading files for filename in filenames: with open(filename, encoding='utf8') as f: # calculates all frequencies freqs = countTags(f.read(),tagger) # inserts them into data matrix data.append(freqs) # turns data matrix into a dataframe df = pd.DataFrame(data,columns=labels) # loads features that i already have saved in .csv files df_extra_features = loadMetricsCSV('var/metrics_csv/') # concatenates the two dataframes: the one with features i've extracted, and the one with features i got from the .csv files df = pd.concat([df,df_extra_features],axis=1).drop('Id',axis=1) return df
def vectorize(text, tagger=None): if tagger == None: tagger = POSTagger(r'var/nlpnet', language='pt') labels = list({ 'ADJ': 0, 'ADV': 0, 'ADV-KS': 0, 'ART': 0, 'CUR': 0, 'IN': 0, 'KC': 0, 'KS': 0, 'N': 0, 'NPROP': 0, 'NUM': 0, 'PCP': 0, 'PDEN': 0, 'PREP': 0, 'PROADJ': 0, 'PRO-KS': 0, 'PROPESS': 0, 'PROSUB': 0, 'V': 0, 'PU': 0 }.keys()) freqs = countTags(text, tagger) return pd.DataFrame([freqs], columns=labels)
def loadPos(filenames): data = [] #loading nlpnet tagger = POSTagger(r'var/nlpnet', language='pt') labels = list({'ADJ': 0, 'ADV': 0, 'ADV-KS': 0, 'ART': 0, 'CUR': 0, 'IN': 0, 'KC': 0, 'KS': 0, 'N': 0, 'NPROP': 0, 'NUM': 0, 'PCP': 0, 'PDEN': 0, 'PREP': 0, 'PROADJ': 0, 'PRO-KS': 0, 'PROPESS': 0, 'PROSUB': 0, 'V': 0, 'PU': 0}.keys()) #loading files for filename in filenames: with open(filename, encoding='utf8') as f: #preprocesses the text read in f using prep() #then counts the frequencies using the tagger #returns a list with frequencies freqs = countTags(f.read(),tagger) #then appends this list into the data segment of the result dict data.append(freqs) return pd.DataFrame(data,columns=labels)
from django.http import JsonResponse from django.shortcuts import render from django.views.decorators.http import require_http_methods from django.views.decorators.csrf import csrf_exempt from preprocess import bow, pos from sklearn.svm import LinearSVC from sklearn.externals import joblib #supress some warnings about type conversion import warnings with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=FutureWarning) from nlpnet import POSTagger tagger = POSTagger(r'var/nlpnet', language='pt') vocabulary = joblib.load('var/vocabulary.pkl') print(len(vocabulary)) uni_clf = joblib.load('var/linearsvc_unigram-binary.pkl') pos_clf = joblib.load('var/linearsvc_pos.pkl') @csrf_exempt @require_http_methods(['POST']) def check(request): text = request.POST['text'] model = request.POST['model'] data = {} # vectorizing received text if (model == 'pos'):