def main(): global POPULATION_COUNT, ITERATIONS, MAX_FUNC_DEPTH file = open('data.txt', 'w') if len(sys.argv) == 3: ITERATIONS = int(sys.argv[1]) if len(sys.argv) == 3: POPULATION_COUNT = int(sys.argv[2]) f, data = create_test_data() write_init_data(file, f, data) population = create_population(POPULATION_COUNT, MAX_FUNC_DEPTH) normalizator = Normalizator(data, mistake) functions = FunctoionContainer(choose_parent_pairs2, choose_to_kill, create_children) options = Options(0.4) mutator = Mutator(population, normalizator, options, functions) print 'population: ' #for s in population: print 'm: ', mistake(s.f, data), '\t\ts.f: ', s.f for i in range(ITERATIONS): print 'population ' + str(i) + ' : ' for j in range(5): s = population[(j + 1) * (-1)] print 'm: ', mistake(s.f, data), '\t\ts.f: ', s.f if j == 1: # ITERATION_NUMBER:MISTAKE:FUNCTION write_population_data(file, i, s, mistake(s.f, data)) mutator.make_children() mutator.kill_unused() # mutator.change_random() mutator.change_age() mutator.mutate() normalizator.compute_distribuante(population) population.sort() file.close()
def main(): global POPULATION_COUNT, ITERATIONS, MAX_FUNC_DEPTH file = open('data.txt', 'w') if len(sys.argv) == 3: ITERATIONS = int(sys.argv[1]) if len(sys.argv) == 3: POPULATION_COUNT = int(sys.argv[2]) f, data = create_test_data() write_init_data(file, f, data) population = create_population(POPULATION_COUNT, MAX_FUNC_DEPTH) normalizator = Normalizator(data, mistake) functions = FunctoionContainer(choose_parent_pairs2, choose_to_kill, create_children) options = Options(0.4) mutator = Mutator(population, normalizator, options, functions) print 'population: ' #for s in population: print 'm: ', mistake(s.f, data), '\t\ts.f: ', s.f for i in range(ITERATIONS): print 'population ' + str(i) + ' : ' for j in range(5): s = population[(j+1)*(-1)] print 'm: ', mistake(s.f, data), '\t\ts.f: ', s.f if j == 1: # ITERATION_NUMBER:MISTAKE:FUNCTION write_population_data(file, i, s, mistake(s.f, data)) mutator.make_children() mutator.kill_unused() # mutator.change_random() mutator.change_age() mutator.mutate() normalizator.compute_distribuante(population) population.sort() file.close()
def saveTweetsOf(screen_name, category): for i in range(1,4): statuses = api.GetSearch(term=settings_local.USERTRACK, lang='es', page=i,per_page=100) for s in statuses: if(s.text.find('RT',0,2)==-1): screen_name = s._user._screen_name t = Normalizator.normalize(s.text) em = Normalizator.usedPattern(Patterns.DUPLICATED_LETTERS) print Normalizator.usedPattern(Patterns.SPECIALCHARS_AND_MENTIONS) print smart_str(s.text.lower()) if (not(t.isspace())): selected = raw_input('Es un reclamo?') tweets = db[settings_local.CATEGORIES[int(selected)]] tweet = {'normalized_tweet': t, 'tweet': s.text, 'screen_name': screen_name, 'emphatized': em } tweets.insert(tweet)
def testTweets(): for i in range(1, 8): statuses = api.GetSearch(term=settings_local.USERTRACK, lang="es", page=i, per_page=100) for s in statuses: t = Normalizator.normalize(s.text) if not (t.isspace()): if classifier.classify(extract_features(t.split())) == "reclamo": print "\033[91m" + t else: print "\033[94m" + t
import etc from normalizator import Normalizator normalizator = Normalizator() candidates_attr = list(map(lambda data: {'id': data['id'], 'attributes': data['attributes'] }, etc.candidates)) candidates_attr_normalized = list() def get_spesific_values(a_list, key): temp = [] for d in a_list: temp.append(d['attributes'][key]) return temp for i in range(len(candidates_attr)): current_salary = candidates_attr[i]['attributes']['salary'] current_gpa = candidates_attr[i]['attributes']['gpa'] current_dependant = candidates_attr[i]['attributes']['dependant'] salary_normalized = normalizator.normalize_salary(get_spesific_values(candidates_attr, 'salary'), current_salary) gpa_normalized = normalizator.normalize_gpa(get_spesific_values(candidates_attr, 'gpa'), current_gpa) dependant_normalized = normalizator.normalize_dependant(get_spesific_values(candidates_attr, 'dependant'), current_dependant) candidates_attr_normalized.append([candidates_attr[i]['id'],salary_normalized, gpa_normalized, dependant_normalized]) # Create a simple weight
for s in statuses: t = Normalizator.normalize(s.text) if not (t.isspace()): if classifier.classify(extract_features(t.split())) == "reclamo": print "\033[91m" + t else: print "\033[94m" + t allTweets = [] for c in settings_local.CATEGORIES: statuses = db[c].find() for s in statuses: t = s["normalized_tweet"] t = Normalizator.normalize(t) if not (t.isspace()): allTweets.append((t, c)) tweets = [] for (words, sentiment) in allTweets: words_filtered = [e.lower() for e in words.split() if len(e) >= 3] tweets.append((words_filtered, sentiment)) def get_words_in_tweets(tweets): all_words = [] for (words, sentiment) in tweets: all_words.extend(words) return all_words