def run(): start_time = time() data_cand, data_part, full_data = load_data() # numeric_parties = full_data.party.map(party_map) train_c, test_c = train_test_split(data_cand, test_size=0.2) train_p, test_p = train_test_split(data_part, test_size=0.2) candidatos_clf = Classifier(train_c.drop('candidatoId', axis=1), train_c.candidatoId) partidos_clf = Classifier(train_p.drop('idPartido', axis=1), train_p.idPartido) cand_solver = candidatos_clf._predict() n_cand, pca_cand_solver = candidatos_clf._pca() part_solver = partidos_clf._predict() n_part, pca_part_solver = partidos_clf._pca() cand_pred = candidatos_clf.classify(test_c.drop('candidatoId', axis=1), test_c.candidatoId, cand_solver) pca_cand_pred = candidatos_clf.classify(test_c.drop('candidatoId', axis=1), test_c.candidatoId, pca_cand_solver, n_cand) part_pred = partidos_clf.classify(test_p.drop('idPartido', axis=1), test_p.idPartido, part_solver) pca_part_pred = partidos_clf.classify(test_p.drop('idPartido', axis=1), test_p.idPartido, pca_part_solver, n_part) output_results(f'CANDIDATOS | {cand_solver}', test_c.candidatoId, cand_pred) output_results(f'CANDIDATOS_PCA | {pca_cand_solver}, {n_cand}', test_c.candidatoId, pca_cand_pred) output_results(f'PARTIDOS | {part_solver}', test_p.idPartido, part_pred) output_results(f'PARTIDOS_PCA | {pca_part_solver}, {n_part}', test_p.idPartido, pca_part_pred) cand_part_target, cand_part_pred = candidato_mapper(test_c.candidatoId, cand_pred) output_results(f'PARTIDOS CON CANDIDATO | {cand_solver}', cand_part_target, cand_part_pred) cm_cand = ConfusionMatrix(test_c.candidatoId, cand_pred) cm_pca_cand = ConfusionMatrix(test_c.candidatoId, pca_cand_pred) cm_part = ConfusionMatrix(test_p.idPartido, part_pred) cm_pca_part = ConfusionMatrix(test_p.idPartido, pca_part_pred) cm_cand_part = ConfusionMatrix(cand_part_target, cand_part_pred) elapsed_time = time() - start_time print(f'----------------------------------------') print(f'TOTAL TIME: {datetime.timedelta(seconds=elapsed_time)}') result = { 'data': { 'candidatos': (train_c, test_c), 'partidos': (train_p, test_p), }, 'results': { 'candidatos': (test_c.candidatoId, cand_pred), 'candidatos_pca': (test_c.candidatoId, pca_cand_pred), 'partidos': (test_p.idPartido, part_pred), 'partidos_pca': (test_p.idPartido, pca_part_pred), 'partidos_candidatos': (cand_part_target, cand_part_pred) }, 'matrices': { 'candidatos': cm_cand, 'candidatos_pca': cm_pca_cand, 'partidos': cm_part, 'partidos_pca': cm_pca_part, 'partidos_candidatos': cm_cand_part } } return result
def main(): directory = '' match = False inject = False db_name = 'certs.db' output = False try: opts, args = getopt.getopt(sys.argv[1:], 'hd:midb:-o', ['help', 'directory=', 'match', 'inject', 'database=', '--output']) except: usage() for opt, arg in opts: if opt in ('-h', '--help'): print('printing hep') usage() elif opt in ('-d', '--directory'): directory = arg if(not directory.endswith('/')): directory += '/' elif opt in ('-m', '--match'): match = True elif opt in ('-i', '--inject'): inject = True elif opt in ('-b', '--database'): db = arg elif opt in ('-o', '--output'): output = True outputfile = arg if(outputfile == ''): outputfile = 'matches.txt' if directory: print("Grabbing data...") grabber = Grabber(directory) classifier = Classifier(grabber) print("Classifying...") classifier.classify() certs, keys = classifier.get_data() db = Database(db_name) if inject: try: print("Creating the database") db.create_db() except: print("Database already exists.") print("Injecting data into the database...") db.insert_keys(keys) db.insert_certs(certs) if match: print("Matching data...") db.match_cert_key() if output: db.export_matches(outputfile)
def parte_a(train, test, numeric_attributes=IRIS_NUMERIC_ATTRIBUTES): classifier = Classifier(ID3(train, numeric_attributes)) classifier.train() actual = [] predicted = [] for _, elem in test.iterrows(): actual.append(elem.clazz) predicted.append( classifier.classify(elem.drop(columns=['clazz'], axis=1))) output_results(title='PARTE A', actual=actual, predicted=predicted)
class Chatbot: # Regular expressions for parsing responses RE_CODE = re.compile(r'.*(\w\w\d{9}\w\w)') RE_SENT = re.compile(r'.*postado.*') RE_FORWARDED = re.compile(r'.*encaminhado.*') RE_DELIVERING = re.compile(r'.*saiu.*') RE_ARRIVED = re.compile(r'.*entregue.*') # Retries when waiting for an action MAX_ATTEMPTS = 2 def __init__(self): # Classifier self.c = Classifier() self.c.load_classifier(TRAINING_SET) self.code = '' # Correios tracker self.t = Tracker() # Answers with open(GREETINGS_SET) as f: self.greetings_responses = f.readlines() with open(COMPLAIN_SET) as f: self.complain_responses = f.readlines() with open(COMPLAIN_SET) as f: self.complain_responses = f.readlines() with open(HELP_SET) as f: self.help_responses = f.readlines() with open(UNKNOWN_SET) as f: self.unknown_responses = f.readlines() with open(WRONG_CODE_SET) as f: self.wrong_code_responses = f.readlines() with open(TRACKING_SET) as f: self.tracking_responses = f.readlines() with open(QUIT_SET) as f: self.quit_responses = f.readlines() with open(GOODBYE_SET) as f: self.goodbye_responses = f.readlines() with open(RESULTS_SET) as f: results_responses = f.readlines() self.offline_response = results_responses[0] self.fail_response = results_responses[1] self.sent_response = results_responses[2] self.forwarded_response = results_responses[3] self.delivering_response = results_responses[4] self.arrived_response = results_responses[5] # If it is expecting something self.waiting_for = '' self.attempts = 0 # Get text intent def get_intent(self, text): return self.c.classify(text)[0] # Return a random response from a set of responses def random_response(self, responses_set): return responses_set[random.randrange(len(responses_set))] # Return a response with formatted results def format_responses(self, code): info = self.t.track_latest(code) answer = '' status = '' if info['return'] == 'request_failed': answer = self.offline_response elif info['return'] == 'failure': answer = self.fail_response elif self.RE_SENT.match(info['status']): answer = self.sent_response answer = answer.format(code, info['when']) elif self.RE_FORWARDED.match(info['status']): answer = self.forwarded_response answer = answer.format(info['from'], info['to']) elif self.RE_DELIVERING.match(info['status']): answer = self.delivering_response answer = answer.format(info['where']) elif self.RE_ARRIVED.match(info['status']): answer = self.arrived_response return answer # Test if received a tracking code and respond def code_responses(self, text): # Respond to code input response = self.random_response(self.unknown_responses) # Try to parse a code test = self.RE_CODE.match(text) if test: # Return the results self.attempts = 0 self.waiting_for = '' response = self.format_responses(test.group(0)) else: # If failed respond if self.attempts == self.MAX_ATTEMPTS: # Quit after MAX_ATTEMPTS self.attempts = 0 self.waiting_for = '' response = self.random_response(self.quit_responses) else: # Else, retry self.attempts = self.attempts + 1 response = self.random_response(self.wrong_code_responses) return response # Get a response according to an intent def get_response(self, text): # Test for tracking code if self.waiting_for == 'code': return self.code_responses(text) # Else, get responses to chat intent = self.get_intent(text) if intent == 'T': self.waiting_for = 'code' responses = { 'G': self.greetings_responses, 'H': self.help_responses, 'T': self.tracking_responses, 'C': self.complain_responses, 'U': self.unknown_responses, 'B': self.goodbye_responses }.get(intent, 'U') return self.random_response(responses)