def check_spam(sender, data, pred =[]): with window("Simple SMS Spam Filter"): if pred == []: #runs only once - the the button is first clicked #and pred[-1] widget doesn't exist add_spacing(count=12) add_separator() add_spacing(count=12) #collect input, pre-process and get prediction input_value = get_value("Input") input_value = pre_process(input_value) pred_text, text_colour = predict(input_value) #store prediction inside the pred list pred.append(pred_text) #display prediction to user add_text(pred[-1], color=text_colour) else: #hide prediction widget hide_item(pred[-1]) #collect the current user input and evaluate it input_value = get_value("Input") input_value = pre_process(input_value) pred_text, text_colour = predict(input_value) #store prediction inside the pred list pred.append(pred_text) add_text(pred[-1], color=text_colour)
def check_spam(sender, data, pred=[]): with window("Simple SMS Spam Filter"): if pred == []: add_spacing(count=12) add_separator add_spacing(count=12) input_value = get_value("Input") input_value = pre_process(input_value) pred_text, text_colour = predict(input_value) pred.append(pred_text) add_text(pred[-1], color=text_colour) else: hide_item(pred[-1]) input_value = get_value("Input") input_value = pre_process(input_value) pred_text, text_colour = predict(input_value) pred.append(pred_text) add_text(pred[-1], color=text_colour)
def calc_thiessen(): print("*************************************************************") print("* Cálculo da precipitação média usando o método de Thiessen *") print("*************************************************************") hidroweb_dir = check_dir( input("Informe o diretório com os dados das estações do Hidroweb:\n"), 'dir') inventory = check_dir( input("Caminho completo para o inventário das estações:\n"), 'file', ext='.csv') shp = check_dir(input("Shapefile com o traçado da bacia hidrográfica:\n"), 'file', ext='.shp') poly = input("Nome do polígono da bacia hidrodráfica:\n") attr = input("Nome do atributo para seleção do polígono:\n") buffer = input("Valor do buffer:\n") date = check_date() dir_out = check_out(input("Informe o diretório de saída:\n")) loc_stations = pre_process(hidroweb_dir, inventory) if not attr: attr = 'basiname' # nome padrão do atributo caso não informado if buffer: buffer = float(buffer) else: buffer = False # padrão caso não informado # extrai vertices do poligono (poly) do shape informado vertices = getvert(shp, poly, attr=attr, buffer=buffer) # verifica quais postos estão dentro do polígono isin = isinpoly3(loc_stations[:, 2], loc_stations[:, 1], vertices) # seleciona apenas postos dentro do polígono estations_in = loc_stations[isin, :] # converte date de string para datetime date = pd.to_datetime(date, format='%d/%m/%Y') # extrai precipitação dos postos dentro do polígono para a data informada pr_estations = open_files(estations_in, hidroweb_dir, date) # cálculo da precipitação média usando o método de thiessen pr_med = thiessen(pr_estations[:, 1], pr_estations[:, 0], vertices[:, 0], vertices[:, 1], pr_estations[:, 2]) # salva a precipitação média no formato .csv save_csv(dir_out, pr_med, date, poly) return None
#imports import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt import functions as func #file containing the user created functions #read csv files and create dataframes train = pd.read_csv('train.csv') test = pd.read_csv('test.csv') #main #.apply allows to apply functions to each row or column df = func.pre_process(train) df.to_csv('dataframe.csv') features_select_K = func.feature_selection( df.drop(['AnimalID'], axis=1), 'OutcomeType', 'KBest', 10 ) #need to concat with class to obtain final dataframe for classification #features_select_Fdr = func.feature_selection(df.drop(['AnimalID'],axis=1),'OutcomeType','Fdr') #need to concat with class to obtain final dataframe for classification #features_select_Fwe = func.feature_selection(df.drop(['AnimalID'],axis=1),'OutcomeType','Fwe') #need to concat with class to obtain final dataframe for classification #features_select_Pct = func.feature_selection(df.drop(['AnimalID'],axis=1),'OutcomeType','Pct') #need to concat with class to obtain final dataframe for classification print('KBest:', features_select_K) #print('Fdr:',features_select_Fdr) #print('Fwe:',features_select_Fwe) #print('Fwe:',features_select_Pct) #print('KBest:',features_select_K.columns) #print('Pct:',features_select_Pct.columns)
import sys reload(sys) sys.setdefaultencoding('utf8') from path import add_parent_to_path add_parent_to_path() from functions import read_corpus, replace_compounds, reddy_ncs, pre_process, print_every, read_ncs, \ write_to_file, get_preprocess_args import logging from config import logging_config import nltk nltk.download('punkt') nltk.download('wordnet') if __name__ == '__main__': logging.info('Reading train and evaluation ncs') args = get_preprocess_args() r_ncs, _ = reddy_ncs(args.p2ec) ncs = read_ncs(args.p2tc) ncs.extend(r_ncs) logging.info('Reading corpus') sentences = read_corpus(args.p2corp) lemmatizer = nltk.stem.WordNetLemmatizer() output = [] logging.info('Replacing ncs in corpus') for i in range(0, len(sentences)): s = sentences[i] print_every(s, i + 1, 10000) output.append(replace_compounds(pre_process(s, lemmatizer), ncs)) logging.info('Writing results in ' + args.p2out) write_to_file(output, args.p2out)