Exemple #1
0
def check_spam(sender, data, pred =[]):
    with window("Simple SMS Spam Filter"):
        if pred == []:
            #runs only once - the the button is first clicked
            #and pred[-1] widget doesn't exist
            add_spacing(count=12)
            add_separator()
            add_spacing(count=12)
            #collect input, pre-process and get prediction
            input_value = get_value("Input")
            input_value = pre_process(input_value)
            pred_text, text_colour = predict(input_value)
            #store prediction inside the pred list
            pred.append(pred_text)
            #display prediction to user
            add_text(pred[-1], color=text_colour)
        else:
            #hide prediction widget
            hide_item(pred[-1])
            #collect the current user input and evaluate it
            input_value = get_value("Input")
            input_value = pre_process(input_value)
            pred_text, text_colour = predict(input_value)
            #store prediction inside the pred list
            pred.append(pred_text)
            add_text(pred[-1], color=text_colour)
Exemple #2
0
def check_spam(sender, data, pred=[]):
    with window("Simple SMS Spam Filter"):
        if pred == []:
            add_spacing(count=12)
            add_separator
            add_spacing(count=12)
            input_value = get_value("Input")
            input_value = pre_process(input_value)
            pred_text, text_colour = predict(input_value)

            pred.append(pred_text)
            add_text(pred[-1], color=text_colour)
        else:
            hide_item(pred[-1])
            input_value = get_value("Input")
            input_value = pre_process(input_value)
            pred_text, text_colour = predict(input_value)

            pred.append(pred_text)
            add_text(pred[-1], color=text_colour)
def calc_thiessen():
    print("*************************************************************")
    print("* Cálculo da precipitação média usando o método de Thiessen *")
    print("*************************************************************")
    hidroweb_dir = check_dir(
        input("Informe o diretório com os dados das estações do Hidroweb:\n"),
        'dir')
    inventory = check_dir(
        input("Caminho completo para o inventário das estações:\n"),
        'file',
        ext='.csv')
    shp = check_dir(input("Shapefile com o traçado da bacia hidrográfica:\n"),
                    'file',
                    ext='.shp')
    poly = input("Nome do polígono da bacia hidrodráfica:\n")
    attr = input("Nome do atributo para seleção do polígono:\n")
    buffer = input("Valor do buffer:\n")
    date = check_date()
    dir_out = check_out(input("Informe o diretório de saída:\n"))

    loc_stations = pre_process(hidroweb_dir, inventory)

    if not attr:
        attr = 'basiname'  # nome padrão do atributo caso não informado

    if buffer:
        buffer = float(buffer)
    else:
        buffer = False  # padrão caso não informado

    # extrai vertices do poligono (poly) do shape informado
    vertices = getvert(shp, poly, attr=attr, buffer=buffer)

    # verifica quais postos estão dentro do polígono
    isin = isinpoly3(loc_stations[:, 2], loc_stations[:, 1], vertices)

    # seleciona apenas postos dentro do polígono
    estations_in = loc_stations[isin, :]

    # converte date de string para datetime
    date = pd.to_datetime(date, format='%d/%m/%Y')

    # extrai precipitação dos postos dentro do polígono para a data informada
    pr_estations = open_files(estations_in, hidroweb_dir, date)

    # cálculo da precipitação média usando o método de thiessen
    pr_med = thiessen(pr_estations[:, 1], pr_estations[:, 0], vertices[:, 0],
                      vertices[:, 1], pr_estations[:, 2])

    # salva a precipitação média no formato .csv
    save_csv(dir_out, pr_med, date, poly)

    return None
Exemple #4
0
#imports
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import functions as func  #file containing the user created functions

#read csv files and create dataframes
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

#main
#.apply allows to apply functions to each row or column

df = func.pre_process(train)
df.to_csv('dataframe.csv')

features_select_K = func.feature_selection(
    df.drop(['AnimalID'], axis=1), 'OutcomeType', 'KBest', 10
)  #need to concat with class to obtain final dataframe for classification
#features_select_Fdr = func.feature_selection(df.drop(['AnimalID'],axis=1),'OutcomeType','Fdr') #need to concat with class to obtain final dataframe for classification
#features_select_Fwe = func.feature_selection(df.drop(['AnimalID'],axis=1),'OutcomeType','Fwe') #need to concat with class to obtain final dataframe for classification
#features_select_Pct = func.feature_selection(df.drop(['AnimalID'],axis=1),'OutcomeType','Pct') #need to concat with class to obtain final dataframe for classification

print('KBest:', features_select_K)
#print('Fdr:',features_select_Fdr)
#print('Fwe:',features_select_Fwe)
#print('Fwe:',features_select_Pct)
#print('KBest:',features_select_K.columns)
#print('Pct:',features_select_Pct.columns)
import sys
reload(sys)
sys.setdefaultencoding('utf8')
from path import add_parent_to_path
add_parent_to_path()
from functions import read_corpus, replace_compounds, reddy_ncs, pre_process, print_every, read_ncs, \
    write_to_file, get_preprocess_args
import logging
from config import logging_config
import nltk
nltk.download('punkt')
nltk.download('wordnet')

if __name__ == '__main__':
    logging.info('Reading train and evaluation ncs')
    args = get_preprocess_args()
    r_ncs, _ = reddy_ncs(args.p2ec)
    ncs = read_ncs(args.p2tc)
    ncs.extend(r_ncs)
    logging.info('Reading corpus')
    sentences = read_corpus(args.p2corp)
    lemmatizer = nltk.stem.WordNetLemmatizer()
    output = []
    logging.info('Replacing ncs in corpus')
    for i in range(0, len(sentences)):
        s = sentences[i]
        print_every(s, i + 1, 10000)
        output.append(replace_compounds(pre_process(s, lemmatizer), ncs))
    logging.info('Writing results in ' + args.p2out)
    write_to_file(output, args.p2out)