Esempio n. 1
0
def main():
    import codecs
    import sys
    import itertools
    import math
    args = parse_args()
    ilines = [util.preprocess(x, args.lang) for x in codecs.open(args.input, 'r', 'utf-8').readlines()]
    rlines = [util.preprocess(x, args.lang) for x in codecs.open(args.ref, 'r', 'utf-8').readlines()]
    if len(ilines) != len(rlines):
        print("Error: input file has {0} lines, but reference has {1} lines.".format(len(ilines), len(rlines)))
        sys.exit(1)
    scores = []
    falign = open(args.align, 'w') if args.align is not None else None
    for lineno, (rline, iline) in enumerate(itertools.izip(ilines, rlines), start=1):
        if args.force_token_mode:
            rline, iline = rline.split(), iline.split()
        else:
            rline, iline = util.split(rline, args.lang), util.split(iline, args.lang)
        # iline, rline are list object
        score, alignment = ter(iline, rline, align=True)
        if args.align is not None:
            falign.write('%s\n' % ' '.join(alignment))
        scores.append(score)
        if args.verbose:
            print("Sentence {0}: {1:.4f}".format(lineno, score))
    if args.align is not None:
        falign.close()
    average = sum(scores) / len(scores)
    variance = sum((x - average) ** 2 for x in scores) / len(scores)
    stddev = math.sqrt(variance)
    print("Average={0:.4f}, Variance={1:.4f}, Standard Deviation={2:.4f}".format(average, variance, stddev))
    def __init__(self, data_dir, data_name, train_ratio, device):
        self.train_ratio = train_ratio
        self.num_negatives = 3
        self.device = device

        if data_name == 'ml-100k':
            sep = '\t'
            filename = 'u.data'
            self.num_users, self.num_items = 943, 1682
        elif data_name == 'ml-1m':
            sep = '::'
            filename = 'ratings.dat'
            self.num_users, self.num_items = 6040, 3952
        else:
            raise NotImplementedError(
                'Dataset not loaded. Availabe are: {ml-100k, ml-1m}')

        data_path = os.path.join(data_dir, data_name, data_name + '.data')
        stat_path = os.path.join(data_dir, data_name, data_name + '.stat')

        if os.path.exists(data_path) and os.path.exists(data_path):
            print('Already preprocessed. Load from file.')
        else:
            preprocess(os.path.join(data_dir, data_name, filename), data_path,
                       stat_path, sep)

        print('Read movielens data from %s' % data_path)
        self.train_matrix, self.test_matrix, self.user_id_map, self.user_popularity, self.item_id_map, self.item_popularity, self.num_uesrs, self.num_items = load_data(
            data_path)
Esempio n. 3
0
def main():
    import sys
    import codecs
    args = parse_args()
    hlines = [
        util.preprocess(x, "en")
        for x in codecs.open(args.hypothesis, 'r', 'utf-8').readlines()
    ]
    rlines = [
        util.preprocess(x, "en")
        for x in codecs.open(args.reference, 'r', 'utf-8').readlines()
    ]
    if len(hlines) != len(rlines):
        print("Error: input file has {0} lines, but reference has {1} lines.".
              format(len(hlines), len(rlines)))
        sys.exit(1)
    scores = []
    for lineno, (hline, rline) in enumerate(zip(hlines, rlines), start=1):
        rline, hline = list(rline), list(hline)
        score = eed(hline, rline)
        scores.append(score)
        if args.verbose:
            print("Sentence {0}: {1:.4f}".format(lineno, score))

    average = sum(scores) / len(scores)

    print("System Score={0:.4f}".format(average))
    sys.exit(0)
Esempio n. 4
0
def main():

    ifHash = False

    trainfile = 'yelp_reviews_train.json'
    X, y, top = util.preprocess(trainfile,
                                ifTrain=True,
                                ifHash=ifHash,
                                trainTop=[])

    W = multiLR.BSGD(X, y)
    t, s = multiLR.predict(W, X)
    print eval.eval(t, s, y)

    predfile = 'yelp_reviews_dev.json'
    x, _, _ = util.preprocess(predfile,
                              ifTrain=False,
                              ifHash=ifHash,
                              trainTop=top)

    t, s = multiLR.predict(W, x)

    util.writePred(t, s, 'v7.txt')

    return
Esempio n. 5
0
def get_id(file_path, input_len=400, target_len=100, max_oov=400):
    with open(file_path, encoding='utf8') as f:
        text = f.read()
    text = text.split('\n\n')

    word_list = []
    art, summ = [], []
    for t in text:
        temp = t.split(':==:')
        art.append(util.preprocess(temp[0]))
        summ.append(util.preprocess(temp[1]))
        word_list += util.preprocess(temp[0]).split()
        word_list += util.preprocess(temp[1]).split()
    del text, t, temp

    word_list = list(set(word_list))
    oov2idx, idx2oov = vocab.create_oov_list(word_list, max_oov)

    art_max, sum_max = 0, 0
    for ind, k in enumerate(art):
        if len(k.split()) > art_max:
            art_max = len(k.split())
        if len(summ[ind].split()) > sum_max:
            sum_max = len(summ[ind].split())
    if art_max > input_len:
        art_max = input_len
    if sum_max > target_len:
        sum_max = target_len

    temp = []
    for index in range(8):
        lst = art[index].split()[:art_max - 2]
        lst = vocab.word_list_to_idx_list(lst, oov2idx)
        lst.insert(0, vocab.w2i['<SOS>'])
        lst.insert(len(lst), vocab.w2i['<EOS>'])
        diff = 0
        if len(lst) < art_max:
            diff = art_max - len(lst)
            pad = [vocab.w2i['<PAD>']] * diff
            lst = lst + pad
        temp.append(lst)
    inp = np.array(temp).astype(int)

    temp = []
    for index in range(8):
        lst = summ[index].split()[:sum_max - 1]
        lst = vocab.word_list_to_idx_list(lst, oov2idx)
        lst.insert(len(lst), vocab.w2i['<EOS>'])
        diff = 0
        if len(lst) < sum_max:
            diff = sum_max - len(lst)
            pad = [vocab.w2i['<PAD>']] * diff
            lst = lst + pad
        temp.append(lst)
    tar = np.array(temp).astype(int)
    return (inp, tar, idx2oov)
Esempio n. 6
0
def score(hypIn, refIn):
    import codecs
    hyp = [util.preprocess(x) for x in open(hypIn, mode='rt', encoding='utf-8').readlines()]
    ref = [util.preprocess(x) for x in open(refIn, mode='rt', encoding='utf-8').readlines()]
    assert len(hyp) == len(ref)
    scores = []
    for (h, r) in zip(hyp, ref):
        # h, r = list(h), list(r)
        score = eed(h, r)
        scores.append(score)
    return sum(scores) / len(scores)
Esempio n. 7
0
def generate():
    ifHash = True
    trainfile = 'yelp_reviews_train.json'
    X, y, top = util.preprocess(trainfile, ifTrain=True, ifHash=ifHash, trainTop=[])

    predfile = 'yelp_reviews_dev.json'
    x, _, _ = util.preprocess(predfile, ifTrain=False, ifHash=ifHash, trainTop=top)

    process(X, y, 'libtrainHash.txt')
    process(x, np.zeros(x.shape[0]), 'libdevHash.txt')
    return
Esempio n. 8
0
    def __getitem__(self, i):
        i = random.randint(0, len(self.raws) - 1)
        raw, blur = self.raws[i], self.blurs[i]

        if self.noise:
            raw, blur, blur_noise = preprocess([raw, blur], self.patchsize,
                                               self.noise)

            return {'A': raw, 'B': blur, 'B_n': blur_noise}
        else:
            raw, blur = preprocess([raw, blur], self.patchsize, self.noise)

            return {'A': raw, 'B': blur}
Esempio n. 9
0
    def __getitem__(self, i):
        idx = random.randint(0, len(self.raw_Ss) - 1)
        raw_S = self.raw_Ss[idx]
        blur_S = self.blur_Ss[idx]
        if self.noise:
            raw, blur, blur_noise = preprocess([raw_S, blur_S], self.patchsize,
                                               self.noise)

            return {'A': raw, 'B': blur, 'B_n': blur_noise}
        else:
            raw, blur = preprocess([raw_S, blur_S], self.patchsize, self.noise)

            return {'A': raw, 'B': blur}
Esempio n. 10
0
    def set_data(self, X_train, labels_train):
        """ Store shuffled data in instance variable self.data,
            and make distinction between training and validation sets."""
        self.data                = self._get_shuffled_data_dict(X_train, labels_train)
        n_train = int(50e3)
        self.data['X_val']       = util.preprocess(self.data['X_train'][n_train:])
        self.data['X_train']     = util.preprocess(self.data['X_train'][:n_train])
        self.data['labels_val']  = self.data['labels_train'][n_train:]
        self.data['labels_train']= self.data['labels_train'][:n_train]

        # The 'active' data will later correspond with the minibatch being used.
        self.X_active       = self.data['X_train']
        self.n_data_active  = self.X_active.shape[0]
        self.labels_active  = self.data['labels_train']
Esempio n. 11
0
    def fit(self, args, train_data, dev_data):
        x_dev_batch, y_dev_batch = util.preprocess(dev_data)
        with tf.Session(graph=self.graph) as sess:
            sw = tf.train.SummaryWriter(self.result_folder, sess.graph)

            print("Init models")
            sess.run(tf.initialize_all_variables())

            for i in range(args.num_epochs):
                train_iterator = util.ptb_iterator(train_data, args.batch_size)
                for x_batch, y_batch in train_iterator:
                    _, train_summaries, total_loss, current_step = self.train_step(
                        sess, x_batch, y_batch)
                    sw.add_summary(train_summaries, current_step)

                    if current_step % args.eval_freq == 0:
                        acc, dev_summaries = self.dev_step(
                            sess, x_dev_batch, y_dev_batch)
                        sw.add_summary(dev_summaries, current_step)

                    if current_step % args.save_freq == 0:
                        self.saver.save(sess,
                                        self.result_folder + '/bee.chkp',
                                        global_step=current_step)
                epoch_acc, dev_summaries = self.dev_step(
                    sess, x_dev_batch, y_dev_batch)
                print('Epoch: %d, Accuracy: %f' % (i + 1, epoch_acc))

            self.saver.save(sess, self.result_folder + '/bee.chkp')

            with open(self.result_folder + '/bee_saver_def.pb', 'w') as f:
                f.write(self.saver.as_saver_def())
Esempio n. 12
0
def main():
    window_size = 1
    hidden_size = 5
    batch_size = 3
    max_epoch = 1000

    text = 'You say goodbye and I say hello.'
    corpus, word_to_id, id_to_word = preprocess(text)

    vocab_size = len(word_to_id)
    contexts, target = create_context_target(corpus, window_size)
    one_hot_target = convert_one_hot(target, vocab_size)
    one_hot_contexts = convert_one_hot(contexts, vocab_size)

    model = SimpleCBOW(vocab_size, hidden_size)
    optimizer = Adam()
    trainer = Trainer(model, optimizer)

    trainer.fit(one_hot_contexts, one_hot_target, max_epoch, batch_size)
    # trainer.plot()

    word_vecs = model.word_vecs
    for word_id, word in id_to_word.items():
        print(word, word_vecs[word_id])
    print('DONE')
Esempio n. 13
0
def get_style_feature(input_style_image, device, style_name):
    # writer = tf.summary.FileWriter(P.st_logs + style_name + "_style_feature")
    with tf.Graph().as_default(), tf.device(device), tf.Session() as sess:
        style_image = tf.placeholder(dtype=tf.float32,
                                     shape=input_style_image.shape(),
                                     name='style_image')
        style_image_pre = util.preprocess(style_image)
        net = VGG.net(P.st_vgg_path,
                      style_image_pre,
                      layer_name='style_feature')
        style_features = dict()
        style_layer = dict(filter(lambda x: x[0] in STYLE_LAYERS, net.items()))
        merge = tf.summary.merge_all()
        # style_layer['summary'] = merge
        result = sess.run(style_layer, {style_image: input_style_image.image})
        for layer in STYLE_LAYERS:
            features = result[layer]
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram
    # writer.add_graph(sess.graph)
    # writer.add_summary(result['summary'])
    # writer.flush()
    # writer.close()
    return style_features
Esempio n. 14
0
def score(hypIn, refIn):
    import codecs
    hyp = [
        util.preprocess(x, "en")
        for x in codecs.open(hypIn, 'r', 'utf-8').readlines()
    ]
    ref = [
        util.preprocess(x, "en")
        for x in codecs.open(refIn, 'r', 'utf-8').readlines()
    ]
    scores = []
    for (h, r) in zip(hyp, ref):
        h, r = list(h), list(r)
        score = eed(h, r)
        scores.append(score)
    return sum(scores) / len(scores)
Esempio n. 15
0
def plot_CDF(filename, start_selector, end_selector, start_index=None, end_index=None, **kwargs):
    records = read_records(filename)
    lineages = preprocess(records, cmd_of_interest='', send_only=False)
    intervals = get_intervals(lineages, start_selector, end_selector, start_index=start_index, end_index=end_index, **kwargs)

    sortedtime = np.sort(intervals.values())
    p = 1. * np.arange(len(intervals.values())) / (len(intervals.values()) - 1)
    plt.plot(sortedtime, p, **kwargs)
 def _file_reader(self, filename_queue):
     # read file from queue
     reader = tf.WholeFileReader()
     _, img_bytes = reader.read(filename_queue)
     # decode it
     image_data = tf.image.decode_jpeg(img_bytes, channels=3)
     # preprocess it and return
     return preprocess(image_data, self.config)
Esempio n. 17
0
def generate():
    ifHash = True
    trainfile = 'yelp_reviews_train.json'
    X, y, top = util.preprocess(trainfile,
                                ifTrain=True,
                                ifHash=ifHash,
                                trainTop=[])

    predfile = 'yelp_reviews_dev.json'
    x, _, _ = util.preprocess(predfile,
                              ifTrain=False,
                              ifHash=ifHash,
                              trainTop=top)

    process(X, y, 'libtrainHash.txt')
    process(x, np.zeros(x.shape[0]), 'libdevHash.txt')
    return
Esempio n. 18
0
def get_rnn_data(N_rows, bucket_size):
    parse_dates = [['Date', 'Time']]
    filename = "household_power_consumption.txt"
    df = preprocess(N_rows, parse_dates, filename)
    df = pd.DataFrame(bucket_avg(df["Global_active_power"], bucket_size))
    df.dropna(inplace=True)
    x = np.array(range(df.shape[0]))
    y = np.array(df.Global_active_power)
    return x, y
    def _process_document(self, abs_path):
        """处理单个文档 返回文档的id以及 这个文档中每个词出现的次数"""
        document = doc(abs_path)

        content = document.article + document.abstract
        #分词 去掉禁用词 之后进行词性还原
        content_tokens = preprocess(content)

        return document.id_, Counter(content_tokens)
Esempio n. 20
0
    def __init__(self, query_string, topk=10):
        """args:
            query_string:查询的字符串
            topk:返回的文档的个数

        """
        self.query_string = query_string
        self.query_tokens = preprocess(self.query_string)
        self.topk = topk
Esempio n. 21
0
 def pre_process(self,top_k,file_names,word2vec_file_name,is_train):
     p = preprocess()
     new_file_name = p.new_file_name(word2vec_file_name,top_k)
     #if training create new word embedding file of top k words
     if is_train:
         print("creating new word2vec file of top {} fetaures".format(top_k))
         top_k_words = p.top_k_freq_words(file_names,top_k)        
         p.top_k_word2vec(word2vec_file_name,top_k_words,embedding_dimension,new_file_name)
     print("loading word2vec file from ",new_file_name)
     self.read_word_embedding(new_file_name)
Esempio n. 22
0
    def infer_vectors(self, posDic, posLda, negDic, negLda):
        """ infer the topic vectors. """
        pos = ""
        neg = ""
        for (ratings, review) in self.pos_reviews:
            pos = pos + review
        for (ratings, review) in self.neg_reviews:
            neg = neg + review

        pos_tuple = posLda[posDic.doc2bow(preprocess(pos))]
        neg_tuple = negLda[negDic.doc2bow(preprocess(neg))]

        pos_repr = [0] * posLda.num_topics
        neg_repr = [0] * negLda.num_topics
        for k, v in pos_tuple:
            pos_repr[k] = v
        for k, v in neg_tuple:
            neg_repr[k] = v
        self.lda_repr = pos_repr + neg_repr
        return self.lda_repr
Esempio n. 23
0
def show_results(file_name,items,colors,labels):
    img_show=util.preprocess(file_name,RGB2BGR=False)
    for r in items:
        #print(r)
        img_show=cv2.rectangle(img_show,(r[2],r[3]),(r[2]+r[4],r[3]+r[5]),color=colors[r[0]],thickness=1)
        font=cv2.FONT_HERSHEY_SIMPLEX
        img_show=cv2.putText(img_show,labels[r[0]],(r[2],r[3]),font,0.6,colors[r[0]],2)
        img_show=cv2.putText(img_show,str(r[1]),(r[2]+r[4],r[3]+r[5]),font,0.3,colors[r[0]],2)
    plt.imshow(img_show)
    plt.show()
    return img_show
Esempio n. 24
0
    def predict(cls, input_text):
        # get the model and vectorizer.
        clf, vectorizer = cls.get_model()

        # clean the text same a what we did while training.
        preprocessed_text = preprocess(input_text)

        # converted the cleaned text into vector
        vector = vectorizer.transform([preprocessed_text])

        # use the sklearn logistic regression predict function to make predicitions
        return clf.predict(vector)[0], clf.predict_proba(vector)[0]
Esempio n. 25
0
    def perturb(self, x, y, sess):
        sess.run(self.new_vars_initializer)
        sess.run(self.xs.initializer)
        sess.run(self.do_clip_xs, {self.orig_xs: x})

        for i in range(self.num_steps):
            imgs = sess.run(self.xs)
            points = imgs.reshape((-1, 3))
            t = preprocess(imgs, self.codes)
            sess.run(self.train, feed_dict={self.ys: y, self.z: t})
            sess.run(self.do_clip_xs, {self.orig_xs: x})

        return sess.run(self.xs)
Esempio n. 26
0
def get_style_images(content_img):
  _, ch, cw, cd = content_img.shape
  style_imgs = []
  for style_fn in args.style_imgs:
    path = os.path.join(args.style_imgs_dir, style_fn)
    # bgr image
    img = cv2.imread(path, cv2.IMREAD_COLOR)
    check_image(img, path)
    img = img.astype(np.float32)
    img = cv2.resize(img, dsize=(cw, ch), interpolation=cv2.INTER_AREA)
    img = preprocess(img)
    style_imgs.append(img)
  return style_imgs
Esempio n. 27
0
    def eval(self, args, test_data):
        x_test_batch, y_test_batch = util.preprocess(test_data)
        checkpoint = tf.train.get_checkpoint_state(args.model_folder)
        with tf.Session(graph=self.graph) as sess:
            print("Init models")
            self.saver.restore(sess, checkpoint.model_checkpoint_path)

            acc = sess.run(self.accuracy,
                           feed_dict={
                               self.x_plh: x_test_batch,
                               self.y_plh: y_test_batch
                           })
            print('Accuracy on test data: %f' % acc)
Esempio n. 28
0
def skills_section(lang):
    assert lang in ["zh", "en"]

    s = "\\section{{{}}}\n\n".format(_["skills"][lang])

    s += "\\vspace{0.618ex}\n"
    s += r"\begin{itemize}"
    s += "\n"

    for i in _["skills"]["details"][lang]:
        s += "\\item " + preprocess(i) + "\n"
    s += r"\end{itemize}"
    s += "\n"
    return s
Esempio n. 29
0
def get_rnn_data(N_rows, bucket_size):
    parse_dates = [['Date', 'Time']]
    filename = "household_power_consumption1.txt"
    df = preprocess(N_rows, parse_dates, filename)
    global_power = pd.DataFrame(
        bucket_avg(df["Global_active_power"], bucket_size))
    sub1 = pd.DataFrame(bucket_avg(df["Sub_metering_1"], bucket_size))
    sub2 = pd.DataFrame(bucket_avg(df["Sub_metering_2"], bucket_size))
    sub3 = pd.DataFrame(bucket_avg(df["Sub_metering_3"], bucket_size))

    #df.dropna(inplace=True)
    #df.iloc[-1, :].index  # last time step  #2010-11-26 21:00:00
    x = np.array(range(global_power.shape[0]))
    y = np.column_stack((sub1, sub2, sub3, global_power))
    return x, y
Esempio n. 30
0
def correct(sentence):
    sentence = preprocess(sentence)
    tokens = tokenize(sentence)
    print('segment sentens is:', ''.join([str(token) for token in tokens]))
    seg_range = [[token[1], token[2]] for token in tokens]
    _, _, maybe_error_range = score_sentence(sentence)
    maybe_error_ranges = []
    if maybe_error_range:
        print('maybe error range:', maybe_error_range)
        maybe_error_ranges = merge_ranges(overlap_ranges(maybe_error_range, seg_range))
        for range in maybe_error_ranges:
            start_index, end_index = range
            print('maybe error words:', sentence[start_index:end_index])
            corrected_words = correct_chars(sentence, start_index, end_index)
            print('corrected words:', corrected_words)
            sentence = sentence[:start_index] + corrected_words + sentence[end_index:]
    return sentence, maybe_error_ranges
Esempio n. 31
0
def run_process(n_click, algo, metric, gen, filename, label):
    # receive gen, filename, label, budget
    if n_click > 0:
        # remove previous files
        if path.exists('pipeline.csv'):
            os.remove('pipeline.csv')
        if path.exists('ref.csv'):
            os.remove('ref.csv')
        p_ref = pipelineRef(algo)
        X, y = preprocess(filename, label, algo)
        total_time, stat = long_process(gen, X, y, metric, algo)
        stat['status'] = 'Completed'
        print("waiting for 3 seconds to finish processes")
        time.sleep(3)
        return ' ', ' : ' + str(total_time) + " Seconds", True, True, True
    else:
        return ' ', ' ', False, False, False
Esempio n. 32
0
def plot_lineages(filename,
                  start_selector,
                  end_selector,
                  start_index=None,
                  end_index=None,
                  **kwargs):
    records = read_records(filename)
    lineages = preprocess(records, cmd_of_interest='', send_only=False)
    intervals = get_intervals(lineages,
                              start_selector,
                              end_selector,
                              start_index=start_index,
                              end_index=end_index,
                              **kwargs)
    items = intervals.items()
    items.sort(key=lambda i: int(i[0]))
    return plt.plot([i[0] for i in items], [i[1] for i in items], **kwargs)
Esempio n. 33
0
def has_stabilized(frames):
    if len(frames) < 1:
        return

    preprocessed = map(lambda x: util.preprocess(x), frames)
    sum_diff = 0

    # find sum of diff for each pair of consequent frames
    for i in range(len(preprocessed)):
        if i + 1 > len(preprocessed) - 1:
            break
        diff = cv2.absdiff(preprocessed[i], preprocessed[i + 1])
        sum_diff += np.sum(diff)

    # normalize diff by number of pixels and frames
    movement = sum_diff / len(preprocessed) / preprocessed[0].size
    return movement < MOVEMENT_THRESHOLD
Esempio n. 34
0
def get_content_image(content_img):
  path = os.path.join(args.content_img_dir, content_img)
   # bgr image
  img = cv2.imread(path, cv2.IMREAD_COLOR)
  check_image(img, path)
  img = img.astype(np.float32)
  h, w, d = img.shape
  mx = args.max_size
  # resize if > max size
  if h > w and h > mx:
    w = (float(mx) / float(h)) * w
    img = cv2.resize(img, dsize=(int(w), mx), interpolation=cv2.INTER_AREA)
  if w > mx:
    h = (float(mx) / float(w)) * h
    img = cv2.resize(img, dsize=(mx, int(h)), interpolation=cv2.INTER_AREA)
  img = preprocess(img)
  return img
Esempio n. 35
0
def get_canny(img):
    preprocessed = util.preprocess(img)
    canny = cv2.Canny(preprocessed, threshold1=200, threshold2=50)
    dilated = cv2.dilate(canny, (10, 10))
    return dilated
Esempio n. 36
0
X_test = []
print "#documents: {}".format(len(glob(corpus_dir + '*.txt')))

for p in glob(corpus_dir + '*.txt'):
    doc_id = os.path.basename(p).split('.')[0]
    if doc_id not in train_doc_ids:
        X_test.append(open(p).read())

X_train = train_docs
label = LabelEncoder().fit(train_labels)
Y_train = label.transform(train_labels)


# preprocessing
X_train = [preprocess(doc) for doc in X_train]
X_test = [preprocess(doc) for doc in X_test]


print "#X_train: {}, #X_test: {}".format(len(X_train), len(X_test))
clf.fit(X_train, Y_train)
Y_test = clf.predict_proba(X_test)

print

for l, i in zip(label.classes_, xrange(Y_test.shape[1])):
    proba = Y_test[:, i]
    print "{}: {}".format(l, len(np.nonzero(proba >= 0.5)[0]))


weights = clf.get_params()['clf'].coef_[0,:]
Esempio n. 37
0
def get_binary(img, thresh=150):
    preprocessed = util.preprocess(img)
    _, threshold = cv2.threshold(preprocessed, thresh=thresh, maxval=255, type=cv2.THRESH_BINARY)
    return threshold