def prepare_data_with_database2(char_lst): positive_samples = [] negative_samples = [] test_x = [] test_y = [] test_char_id_lst = [] test_accuracy_lst = [] logger = logging.getLogger(__name__) for char in char_lst: label = char.is_correct char_id = char.id try: binary = normalize(char) feature_vector = binary.ravel() except Exception, e: #push_to_slack(msg) # Get an instance of a logger msg = "ID: %s %s feature_vector fetch failure!" % (char.id, char.char) logger.error(msg) feature_vector = None if feature_vector is not None: test_x.append(feature_vector) test_y.append( int(label) ) test_char_id_lst.append(char_id) test_accuracy_lst.append(char.accuracy) if label == 1: positive_samples.append([feature_vector, label, char.accuracy]) elif label == -1: negative_samples.append([feature_vector, label, char.accuracy])
def fetch_negative_samples(char, X=[] * 1, y=[] * 1): char_count_map = {} total_count = Character.objects.filter(Q(is_correct=1) & ~Q(char=char)).count() print 'negative samples: total %d' % total_count iter_count = (total_count - 1) / 10000 for i in range(iter_count): start = i * 10000 characters = Character.objects.filter(Q(is_correct=1) & ~Q(char=char))[start:start + 10000] for ch in characters: count = char_count_map.get(ch.char, 0) if count >= 5: continue label = -1 try: binary = normalize(ch) feature_vector = binary.ravel() except: feature_vector = None if feature_vector is not None: X.append(feature_vector) y.append(label) char_count_map[ch.char] = count + 1 return X, y
def prepare_data_with_database2(char_lst): positive_samples = [] negative_samples = [] test_x = [] test_y = [] test_char_id_lst = [] test_accuracy_lst = [] logger = logging.getLogger(__name__) for char in char_lst: label = char.is_correct char_id = char.id try: binary = normalize(char) feature_vector = binary.ravel() except Exception, e: #push_to_slack(msg) # Get an instance of a logger msg = "ID: %s %s feature_vector fetch failure!" % (char.id, char.char) logger.error(msg) feature_vector = None if feature_vector is not None: test_x.append(feature_vector) test_y.append(int(label)) test_char_id_lst.append(char_id) test_accuracy_lst.append(char.accuracy) if label == 1: positive_samples.append([feature_vector, label, char.accuracy]) elif label == -1: negative_samples.append([feature_vector, label, char.accuracy])
def prepare_data_with_database(char_lst): prob_x = [] prob_y = [] test_x = [] test_y = [] test_char_id_lst = [] test_accuracy_lst = [] for char in char_lst: label = char.is_correct char_id = char.id try: binary = normalize(char) feature_vector = binary.ravel() except: feature_vector = None if feature_vector is not None: test_x.append(feature_vector) test_y.append(int(label)) test_char_id_lst.append(char_id) test_accuracy_lst.append(char.accuracy) if abs(label) == 1: prob_x.append(feature_vector) prob_y.append(label) return (prob_y, prob_x, test_y, test_x, test_char_id_lst, test_accuracy_lst)
def fetch_negative_samples(char, X = [] * 1, y = [] * 1): char_count_map = {} total_count = Character.objects.filter(Q(is_correct=1) & ~Q(char=char)).count() print 'negative samples: total %d' % total_count iter_count = (total_count - 1) / 10000 for i in range(iter_count): start = i * 10000 characters = Character.objects.filter(Q(is_correct=1) & ~Q(char=char))[start:start+10000] for ch in characters: count = char_count_map.get(ch.char, 0) if count >= 5: continue label = -1 try: binary = normalize(ch) feature_vector = binary.ravel() except: feature_vector = None if feature_vector is not None: X.append(feature_vector) y.append( label ) char_count_map[ch.char] = count + 1 return X, y
def prepare_data_with_database(char_lst): prob_x = [] prob_y = [] test_x = [] test_y = [] test_char_id_lst = [] test_accuracy_lst = [] for char in char_lst: label = char.is_correct char_id = char.id try: binary = normalize(char) feature_vector = binary.ravel() except: feature_vector = None if feature_vector is not None: test_x.append(feature_vector) test_y.append( int(label) ) test_char_id_lst.append(char_id) test_accuracy_lst.append(char.accuracy) if abs(label) == 1: prob_x.append(feature_vector) prob_y.append(label) return (prob_y, prob_x, test_y, test_x, test_char_id_lst, test_accuracy_lst)