Example #1
0
def prepare_data_with_database2(char_lst):
    positive_samples = []
    negative_samples = []
    test_x = []
    test_y = []
    test_char_id_lst = []
    test_accuracy_lst = []
    logger = logging.getLogger(__name__)

    for char in char_lst:
        label = char.is_correct
        char_id = char.id
        try:
            binary = normalize(char)
            feature_vector = binary.ravel()
        except Exception, e:
            #push_to_slack(msg)
            # Get an instance of a logger
            msg = "ID: %s  %s feature_vector fetch failure!" % (char.id, char.char)
            logger.error(msg)
            feature_vector = None
        if feature_vector is not None:
            test_x.append(feature_vector)
            test_y.append( int(label) )
            test_char_id_lst.append(char_id)
            test_accuracy_lst.append(char.accuracy)
            if label == 1:
                positive_samples.append([feature_vector, label, char.accuracy])
            elif label == -1:
                negative_samples.append([feature_vector, label, char.accuracy])
Example #2
0
def fetch_negative_samples(char, X=[] * 1, y=[] * 1):
    char_count_map = {}
    total_count = Character.objects.filter(Q(is_correct=1)
                                           & ~Q(char=char)).count()
    print 'negative samples: total %d' % total_count
    iter_count = (total_count - 1) / 10000
    for i in range(iter_count):
        start = i * 10000
        characters = Character.objects.filter(Q(is_correct=1)
                                              & ~Q(char=char))[start:start +
                                                               10000]
        for ch in characters:
            count = char_count_map.get(ch.char, 0)
            if count >= 5:
                continue
            label = -1
            try:
                binary = normalize(ch)
                feature_vector = binary.ravel()
            except:
                feature_vector = None
            if feature_vector is not None:
                X.append(feature_vector)
                y.append(label)
                char_count_map[ch.char] = count + 1
    return X, y
Example #3
0
def prepare_data_with_database2(char_lst):
    positive_samples = []
    negative_samples = []
    test_x = []
    test_y = []
    test_char_id_lst = []
    test_accuracy_lst = []
    logger = logging.getLogger(__name__)

    for char in char_lst:
        label = char.is_correct
        char_id = char.id
        try:
            binary = normalize(char)
            feature_vector = binary.ravel()
        except Exception, e:
            #push_to_slack(msg)
            # Get an instance of a logger
            msg = "ID: %s  %s feature_vector fetch failure!" % (char.id,
                                                                char.char)
            logger.error(msg)
            feature_vector = None
        if feature_vector is not None:
            test_x.append(feature_vector)
            test_y.append(int(label))
            test_char_id_lst.append(char_id)
            test_accuracy_lst.append(char.accuracy)
            if label == 1:
                positive_samples.append([feature_vector, label, char.accuracy])
            elif label == -1:
                negative_samples.append([feature_vector, label, char.accuracy])
Example #4
0
def prepare_data_with_database(char_lst):
    prob_x = []
    prob_y = []
    test_x = []
    test_y = []
    test_char_id_lst = []
    test_accuracy_lst = []
    for char in char_lst:
        label = char.is_correct
        char_id = char.id
        try:
            binary = normalize(char)
            feature_vector = binary.ravel()
        except:
            feature_vector = None
        if feature_vector is not None:
            test_x.append(feature_vector)
            test_y.append(int(label))
            test_char_id_lst.append(char_id)
            test_accuracy_lst.append(char.accuracy)
            if abs(label) == 1:
                prob_x.append(feature_vector)
                prob_y.append(label)
    return (prob_y, prob_x, test_y, test_x, test_char_id_lst,
            test_accuracy_lst)
Example #5
0
def fetch_negative_samples(char, X = [] * 1, y = [] * 1):
    char_count_map = {}
    total_count = Character.objects.filter(Q(is_correct=1) & ~Q(char=char)).count()
    print 'negative samples: total %d' % total_count
    iter_count = (total_count - 1) / 10000
    for i in range(iter_count):
        start = i * 10000
        characters = Character.objects.filter(Q(is_correct=1) & ~Q(char=char))[start:start+10000]
        for ch in characters:
            count = char_count_map.get(ch.char, 0)
            if count >= 5:
                continue
            label = -1
            try:
                binary = normalize(ch)
                feature_vector = binary.ravel()
            except:
                feature_vector = None
            if feature_vector is not None:
                X.append(feature_vector)
                y.append( label )
                char_count_map[ch.char] = count + 1
    return X, y
Example #6
0
def prepare_data_with_database(char_lst):
    prob_x = []
    prob_y = []
    test_x = []
    test_y = []
    test_char_id_lst = []
    test_accuracy_lst = []
    for char in char_lst:
        label = char.is_correct
        char_id = char.id
        try:
            binary = normalize(char)
            feature_vector = binary.ravel()
        except:
            feature_vector = None
        if feature_vector is not None:
            test_x.append(feature_vector)
            test_y.append( int(label) )
            test_char_id_lst.append(char_id)
            test_accuracy_lst.append(char.accuracy)
            if abs(label) == 1:
                prob_x.append(feature_vector)
                prob_y.append(label)
    return (prob_y, prob_x, test_y, test_x, test_char_id_lst, test_accuracy_lst)