Ejemplo n.º 1
0
def main(args):
    gpu = args.gpu
    path_config = args.config
    mode = args.mode
    path_word2vec = args.word2vec
    curriculum = False if args.curriculum == 0 else True
    
    # Hyper parameters (const)
    MAX_EPOCH = 10000000000
    MAX_PATIENCE = 20
    EVAL = 10000
    if curriculum:
        LENGTH_LIMITS = [10, 20, 30, 40, 50] # NOTE: experimental
    else:
        LENGTH_LIMITS = [50]

    config = utils.Config(path_config)
    
    # Preparaton
    path_corpus_train = config.getpath("prep_corpus") + ".train"
    path_corpus_val = config.getpath("prep_corpus") + ".val"
    basename = "won.%s.%s" % (
                    os.path.basename(path_corpus_train),
                    os.path.splitext(os.path.basename(path_config))[0])
    path_snapshot = os.path.join(config.getpath("snapshot"), basename + ".model")
    path_snapshot_vectors = os.path.join(config.getpath("snapshot"), basename + ".vectors.txt")
    if mode == "train":
        path_log = os.path.join(config.getpath("log"), basename + ".log")
        utils.set_logger(path_log)
    elif mode == "evaluation":
        path_evaluation = os.path.join(config.getpath("evaluation"), basename + ".txt")
        utils.set_logger(path_evaluation)
    elif mode == "analysis":
        path_analysis = os.path.join(config.getpath("analysis"), basename)

    utils.logger.debug("[info] TRAINING CORPUS: %s" % path_corpus_train)
    utils.logger.debug("[info] VALIDATION CORPUS: %s" % path_corpus_val)
    utils.logger.debug("[info] CONFIG: %s" % path_config)
    utils.logger.debug("[info] PRE-TRAINED WORD EMBEDDINGS: %s" % path_word2vec)
    utils.logger.debug("[info] SNAPSHOT (MODEL): %s " % path_snapshot)
    utils.logger.debug("[info] SNAPSHOT (WORD EMBEDDINGS): %s " % path_snapshot_vectors)
    if mode == "train":
        utils.logger.debug("[info] LOG: %s" % path_log)
    elif mode == "evaluation":
        utils.logger.debug("[info] EVALUATION: %s" % path_evaluation)
    elif mode == "analysis":
        utils.logger.debug("[info] ANALYSIS: %s" % path_analysis)

    # Hyper parameters
    word_dim = config.getint("word_dim")
    state_dim = config.getint("state_dim")
    aggregation = config.getstr("aggregation")
    attention = config.getstr("attention")
    retrofitting = config.getbool("retrofitting")
    alpha = config.getfloat("alpha")
    scale = config.getfloat("scale")
    identity_penalty = config.getbool("identity_penalty")
    lmd = config.getfloat("lambda")
    grad_clip = config.getfloat("grad_clip")
    weight_decay = config.getfloat("weight_decay")
    batch_size = config.getint("batch_size")

    utils.logger.debug("[info] WORD DIM: %d" % word_dim)
    utils.logger.debug("[info] STATE DIM: %d" % state_dim)
    utils.logger.debug("[info] AGGREGATION METHOD: %s" % aggregation)
    utils.logger.debug("[info] ATTENTION METHOD: %s" % attention)
    utils.logger.debug("[info] RETROFITTING: %s" % retrofitting)
    utils.logger.debug("[info] ALPHA = %f" % alpha) 
    utils.logger.debug("[info] SCALE: %f" % scale)
    utils.logger.debug("[info] IDENTITY PENALTY: %s" % identity_penalty)
    utils.logger.debug("[info] LAMBDA: %f" % lmd)
    utils.logger.debug("[info] GRADIENT CLIPPING: %f" % grad_clip)
    utils.logger.debug("[info] WEIGHT DECAY: %f" % weight_decay)
    utils.logger.debug("[info] BATCH SIZE: %d" % batch_size)

    if retrofitting:
        assert path_word2vec is not None

    # Data preparation
    corpus_train_list = [
        load_corpus(
                path_corpus_train,
                vocab=path_corpus_train + ".vocab",
                max_length=length_limit)
        for length_limit in LENGTH_LIMITS]
    corpus_val = load_corpus(
                path_corpus_val,
                vocab=corpus_train_list[0].vocab,
                max_length=LENGTH_LIMITS[-1])

    # Model preparation 
    if (mode == "train") and (path_word2vec is not None):
        initialW_data = utils.load_word2vec_weight_matrix(
                                    path_word2vec,
                                    word_dim,
                                    corpus_train_list[0].vocab,
                                    scale)
    else:
        initialW_data = None
    cuda.get_device(gpu).use()
    model = models.WON(
                vocab_size=len(corpus_train_list[0].vocab),
                word_dim=word_dim,
                state_dim=state_dim,
                aggregation=aggregation,
                attention=attention,
                initialW=initialW_data,
                EOS_ID=corpus_train_list[0].vocab["<EOS>"])
    if mode != "train":
        serializers.load_npz(path_snapshot, model)
    model.to_gpu(gpu)
    
    # Training/Evaluation/Analysis
    if mode == "train":
        length_index = 0
        utils.logger.debug("[info] Evaluating on the validation set ...")
        loss, acc = evaluate(model, corpus_val,
                                lmd, identity_penalty)
        utils.logger.debug("[validation] iter=0, epoch=0, max_length=%d, loss=%.03f, accuracy=%.2f%%" % \
                                (LENGTH_LIMITS[length_index], loss, acc*100))
        for _ in np.random.randint(0, len(corpus_val), 10):
            s = corpus_val.random_sample()
            batch_sents = [s]
            batch_labels = make_labels(batch_sents)
            _, order_pred = model.forward(batch_sents, train=False)
            order_pred = [a[0] for a in order_pred]
            order_gold = batch_labels[0]
            s = [corpus_val.ivocab[w] for w in s]
            s_pred = utils.reorder(s, order_pred)
            s_gold = utils.reorder(s, order_gold)
            s_pred = " ".join(s_pred).encode("utf-8")
            s_gold = " ".join(s_gold).encode("utf-8")
            utils.logger.debug("[check] <Gold> %s" % s_gold)
            utils.logger.debug("[check] <Pred> %s" % s_pred)
            utils.logger.debug("[check] <Gold:order> %s" % order_gold)
            utils.logger.debug("[check] <Pred:order> %s" % order_pred)
        # training & validation
        opt = optimizers.SMORMS3()
        opt.setup(model)
        opt.add_hook(chainer.optimizer.GradientClipping(grad_clip))
        opt.add_hook(chainer.optimizer.WeightDecay(weight_decay))
        # best_acc = -1.0
        best_acc = acc
        patience = 0
        it = 0
        n_train = len(corpus_train_list[0]) # TODO
        finish_training = False
        for epoch in xrange(1, MAX_EPOCH+1): 
            if finish_training:
                break
            for data_i in xrange(0, n_train, batch_size):
                if data_i + batch_size > n_train:
                    break
                # data preparation
                batch_sents = corpus_train_list[length_index].next_batch(size=batch_size)
                batch_labels = make_labels(batch_sents)
                # forward
                loss, acc = forward(model, batch_sents, batch_labels,
                                    lmd, identity_penalty,
                                    train=True)
                # TODO: BEGIN
                if retrofitting:
                    part_indices_data = np.asarray(list(
                        set([w for s_ in batch_sents for w in s_])
                        ))
                    part_initialW_data = initialW_data[part_indices_data]
                
                    part_indices = Variable(cuda.cupy.asarray(part_indices_data, dtype=np.int32),
                                            volatile=False)
                    part_initialW = Variable(cuda.cupy.asarray(part_initialW_data, dtype=np.float32),
                                            volatile=False)
                    loss_ret = frobenius_squared_error(model.embed(part_indices), part_initialW)
                else:
                    loss_ret = 0.0
                loss = loss + alpha * loss_ret
                # TODO: END
                # backward & update
                model.zerograds()
                loss.backward()
                loss.unchain_backward()
                opt.update()
                it += 1
                # log
                loss = float(cuda.to_cpu(loss.data))
                acc = float(cuda.to_cpu(acc.data))
                utils.logger.debug("[training] iter=%d, epoch=%d (%d/%d=%.03f%%), max_length=%d, loss=%.03f, accuracy=%.2f%%" % \
                                    (it, epoch, 
                                    data_i+batch_size,
                                    n_train,
                                    float(data_i+batch_size)/n_train * 100,
                                    LENGTH_LIMITS[length_index],
                                    loss,
                                    acc*100))
                if it % EVAL == 0: 
                    # validation
                    utils.logger.debug("[info] Evaluating on the validation set ...")
                    loss, acc = evaluate(model, corpus_val,
                                            lmd, identity_penalty)
                    utils.logger.debug("[validation] iter=%d, epoch=%d, max_length=%d, loss=%.03f, accuracy=%.2f%%" % \
                                            (it, epoch, LENGTH_LIMITS[length_index], loss, acc*100))
                    for _ in np.random.randint(0, len(corpus_val), 10):
                        s = corpus_val.random_sample()
                        batch_sents = [s]
                        batch_labels = make_labels(batch_sents)
                        _, order_pred = model.forward(batch_sents, train=False)
                        order_pred = [a[0] for a in order_pred]
                        order_gold = batch_labels[0]
                        s = [corpus_val.ivocab[w] for w in s]
                        s_pred = utils.reorder(s, order_pred)
                        s_gold = utils.reorder(s, order_gold)
                        s_pred = " ".join(s_pred).encode("utf-8")
                        s_gold = " ".join(s_gold).encode("utf-8")
                        utils.logger.debug("[check] <Gold> %s" % s_gold)
                        utils.logger.debug("[check] <Pred> %s" % s_pred)
                        utils.logger.debug("[check] <Gold:order> %s" % order_gold)
                        utils.logger.debug("[check] <Pred:order> %s" % order_pred)

                    if best_acc < acc:
                        # save
                        utils.logger.debug("[info] Best accuracy is updated: %.2f%% => %.2f%%" % (best_acc*100.0, acc*100.0))
                        best_acc = acc
                        patience = 0
                        serializers.save_npz(path_snapshot, model)
                        serializers.save_npz(path_snapshot + ".opt", opt)
                        save_word2vec(path_snapshot_vectors, extract_word2vec(model, corpus_train_list[length_index].vocab))
                        utils.logger.debug("[info] Saved.")
                    else:
                        patience += 1
                        utils.logger.debug("[info] Patience: %d (best accuracy: %.2f%%)" % (patience, best_acc*100.0))
                        if patience >= MAX_PATIENCE:
                            if curriculum and (length_index != len(LENGTH_LIMITS)-1):
                                length_index += 1
                                break
                            else:
                                utils.logger.debug("[info] Patience %d is over. Training finished." \
                                        % patience)
                                finish_training = True
                                break
    elif mode == "evaluation":
        pass
    elif mode == "analysis":
        utils.mkdir(path_analysis)
        f = open(os.path.join(path_analysis, "dump.txt"), "w")
        data_i = 0
        for s in pyprind.prog_bar(corpus_val):
            # NOTE: analysisの場合は, 文長を気にせずすべて解かせる
            batch_sents = [s]
            batch_labels = make_labels(batch_sents)
            _, order_pred = model.forward(batch_sents, train=False)
            order_pred = [a[0] for a in order_pred]
            order_gold = batch_labels[0]
            s = [corpus_val.ivocab[w] for w in s]
            s_pred = utils.reorder(s, order_pred)
            s_gold = utils.reorder(s, order_gold)
            s_pred = " ".join(s_pred).encode("utf-8")
            s_gold = " ".join(s_gold).encode("utf-8")
            f.write("[%d] <Gold> %s\n" % (data_i+1, s_gold))
            f.write("[%d] <Pred> %s\n" % (data_i+1, s_pred))
            f.write("[%d] <Gold:order> %s\n" % (data_i+1, order_gold))
            f.write("[%d] <Pred:order> %s\n" % (data_i+1, order_pred))
            data_i += 1
        f.flush()
        f.close()

    utils.logger.debug("[info] Done.")
Ejemplo n.º 2
0
    contours, hierarchy = cv2.findContours(
        imgErode, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)  # FINDING CONTOURS
    contours = sorted(
        contours, key=cv2.contourArea,
        reverse=True)  # SORTING CONTOURS TO GET THE BIGGEST CONTOUR

    for c in contours:
        p = cv2.arcLength(c, True)  # FINDING ARC LENGTH
        approx = cv2.approxPolyDP(c, 0.02 * p, True)

        if len(approx) == 4:  # LENGTH IS 4 FOR RECTANGLE
            target = approx
            break

    approx = utils.reorder(target)  # REORDERING THE POINTS
    #cv2.drawContours(img,target,-1,(0,255,0),20) # TO DISPLAY CONTOURS
    print(approx)  # DISPLAYS THE POINTS
    pts1 = np.float32(approx)
    pts2 = np.float32([[0, 0], [widthImg, 0], [0, heightImg],
                       [widthImg, heightImg]])

    matrix = cv2.getPerspectiveTransform(pts1, pts2)
    imgWarpColored = cv2.warpPerspective(
        img, matrix, (widthImg, heightImg))  # WARPS THE IMAGE

    cv2.imshow('Original', img)
    cv2.imshow('Erode', imgErode)
    cv2.imshow('WarpCol', imgWarpColored)

    if cv2.waitKey(1) & 0xFF == ord('q'):
Ejemplo n.º 3
0
contours, hierarchy = cv2.findContours(imgCanny,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
cv2.drawContours(imgContours,contours,-1,(255,0,255),7)


# =================Find Rectangles===================
rectCont = utils.rectContour(contours)

biggestContour = utils.getCornerPoints(rectCont[0]) # First Biggest area
gradePoints = utils.getCornerPoints(rectCont[1])    # second biggest for grading

if len(biggestContour)!=0 and len(gradePoints!=0):
    cv2.drawContours(imgBiggestContours,biggestContour,-1,(0,0,255),15)
    cv2.drawContours(imgBiggestContours,gradePoints,-1,(255,0,0),15)

    # Reorder points for our bird view
    biggestContour = utils.reorder(biggestContour)
    gradePoints = utils.reorder(gradePoints)
    

    # ====================== Bird View for Biggest Rectangle (OMR)============================
    pt1 = np.float32(biggestContour)
    pt2 = np.float32([[0, 0],[widthImg, 0], [0, heightImg],[widthImg, heightImg]])
    matrix = cv2.getPerspectiveTransform(pt1,pt2)
    imgWarpColored = cv2.warpPerspective(img, matrix, (widthImg, heightImg))

    # ============= Bird View for Second Biggest Rectangle (Grading) ============================
    ptsG1 = np.float32(gradePoints)  
    ptsG2 = np.float32([[0, 0], [325, 0], [0, 150], [325, 150]])  
    matrixG = cv2.getPerspectiveTransform(ptsG1, ptsG2)
    imgGradeDisplay = cv2.warpPerspective(img, matrixG, (325, 150)) 
    #cv2.imshow("grade",imgGradeDisplay)
Ejemplo n.º 4
0
    else: img = cv2.imread(path)

    imgContours, conts = utils.getContours(img, minArea=50000, filter=4)
    if len(conts) != 0:
        biggest = conts[0][2]
        #print(biggest)
        imgWarp = utils.warpImg(img, biggest, wP, hP)
        imgContours2, conts2 = utils.getContours(imgWarp,
                                                 minArea=2000,
                                                 filter=4,
                                                 cThr=[50, 50],
                                                 draw=False)
        if len(conts) != 0:
            for obj in conts2:
                cv2.polylines(imgContours2, [obj[2]], True, (0, 255, 0), 2)
                nPoints = utils.reorder(obj[2])
                nW = round((utils.findDis(nPoints[0][0] // scale,
                                          nPoints[1][0] // scale) / 10), 1)
                nH = round((utils.findDis(nPoints[0][0] // scale,
                                          nPoints[2][0] // scale) / 10), 1)
                cv2.arrowedLine(imgContours2,
                                (nPoints[0][0][0], nPoints[0][0][1]),
                                (nPoints[1][0][0], nPoints[1][0][1]),
                                (255, 0, 255), 3, 8, 0, 0.05)
                cv2.arrowedLine(imgContours2,
                                (nPoints[0][0][0], nPoints[0][0][1]),
                                (nPoints[2][0][0], nPoints[2][0][1]),
                                (255, 0, 255), 3, 8, 0, 0.05)
                x, y, w, h = obj[3]
                cv2.putText(imgContours2, '{}cm'.format(nW), (x + 30, y - 10),
                            cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (255, 0, 255),
Ejemplo n.º 5
0
def test_reorder():
    seq = ['b', 'c', 'a']
    assert utils.reorder(seq, [2, 0, 1]) == ['a', 'b', 'c']
Ejemplo n.º 6
0
    t1, t2 = utils.get_val()
    # print(t1,t2)

    #Load Image
    img = cv2.imread(path)
    # print(img.shape)

    #Perform edge detection,dilation and erosion
    pp_img, orig_copy = utils.preprocess(img, t1, t2)
    imgContours = orig_copy.copy()  #Copying image for display purposes

    #Finding contours from the binary image
    contours = utils.get_contours(pp_img)
    biggest_contours = utils.getBigCntr(contours)
    cv2.drawContours(imgContours, contours, -1, (0, 255, 0), 2)
    reorderd_points = utils.reorder(biggest_contours)
    # print("reordered points : ",len(reorderd_points),type(reorderd_points))

    #Apply perspective transform on the resized image
    final = utils.get_perspective(reorderd_points, orig_copy)

    #display the images
    cv2.imshow("Image", orig_copy)
    cv2.imshow("Preprocessed", pp_img)
    cv2.imshow("contours", imgContours)
    cv2.imshow("Final", final)

    if cv2.waitKey(1) == 27:
        cv2.imwrite("output/resized.jpeg", orig_copy)
        cv2.imwrite("output/preprocessed.jpeg", pp_img)
        cv2.imwrite("output/contours.jpeg", imgContours)
Ejemplo n.º 7
0
    imgThreshold = cv2.Canny(imgBlur, thres[0], thres[1])  # APPLY CANNY BLUR
    kernel = np.ones((5, 5))
    imgDial = cv2.dilate(imgThreshold, kernel, iterations=2)  # APPLY DILATION
    imgThreshold = cv2.erode(imgDial, kernel, iterations=1)  # APPLY EROSION

    ## FIND ALL COUNTOURS
    imgContours = img.copy()  # COPY IMAGE FOR DISPLAY PURPOSES
    imgBigContour = img.copy()  # COPY IMAGE FOR DISPLAY PURPOSES
    contours, hierarchy = cv2.findContours(imgThreshold, cv2.RETR_EXTERNAL,
                                           cv2.CHAIN_APPROX_SIMPLE)  # FIND ALL CONTOURS
    cv2.drawContours(imgContours, contours, -1, (0, 255, 0), 10)  # DRAW ALL DETECTED CONTOURS

    # FIND THE BIGGEST COUNTOUR
    biggest, maxArea = utils.biggestContour(contours)  # FIND THE BIGGEST CONTOUR
    if biggest.size != 0:
        biggest = utils.reorder(biggest)
        cv2.drawContours(imgBigContour, biggest, -1, (0, 255, 0), 20)  # DRAW THE BIGGEST CONTOUR
        imgBigContour = utils.drawRectangle(imgBigContour, biggest, 2)
        pts1 = np.float32(biggest)  # PREPARE POINTS FOR WARP
        pts2 = np.float32([[0, 0], [widthImg, 0], [0, heightImg], [widthImg, heightImg]])  # PREPARE POINTS FOR WARP
        matrix = cv2.getPerspectiveTransform(pts1, pts2)
        imgWarpColored = cv2.warpPerspective(img, matrix, (widthImg, heightImg))

        # REMOVE 20 PIXELS FORM EACH SIDE
        imgWarpColored = imgWarpColored[20:imgWarpColored.shape[0] - 20, 20:imgWarpColored.shape[1] - 20]
        imgWarpColored = cv2.resize(imgWarpColored, (widthImg, heightImg))

        # APPLY ADAPTIVE THRESHOLD
        imgWarpGray = cv2.cvtColor(imgWarpColored, cv2.COLOR_BGR2GRAY)
        imgAdaptiveThre = cv2.adaptiveThreshold(imgWarpGray, 255, 1, 1, 7, 2)
        imgAdaptiveThre = cv2.bitwise_not(imgAdaptiveThre)
Ejemplo n.º 8
0
 def update_graph(self, element_list, rank):
     """Add ranking information to the graph"""
     ordered_elements = utils.reorder(element_list, rank)
     for i in list(range(len(ordered_elements)))[:-1]:
         self.add_edge(superior_element=ordered_elements[i],
                       inferior_element=ordered_elements[i + 1])
Ejemplo n.º 9
0
    def ordering(self):
        '''Change the order of all children in a parent's collection.'''

        program_id = self.get_argument('programId', '')
        survey_id = self.get_argument('surveyId', '')
        parent_id = self.get_argument('parentId', '')
        root = self.get_argument('root', None)

        #if parent_id and root is None:
        if parent_id is None and root is None: 
            raise errors.ModelError(
                "Parent ID required, or specify 'root=' for root nodes")
        if root is not None and parent_id:
            raise errors.ModelError(
                "Can't specify both 'root=' and parent ID")
            if not survey_id:
                raise errors.ModelError(
                    "Survey ID is required for operating on root nodes")

        son = json_decode(self.request.body)

        with model.session_scope() as session:
            user_session = self.get_user_session(session)

            act = Activities(session)
            if parent_id:
                parent = (
                    session.query(model.QuestionNode)
                    .get((parent_id, program_id)))
                if not parent:
                    raise errors.MissingDocError(
                        "Parent question node does not exist")
                survey = parent.survey
                if survey_id and survey_id != str(survey.id):
                    raise errors.MissingDocError(
                        "Parent does not belong to that survey")
                log.debug("Reordering children of: %s", parent)
                reorder(parent.children, son)
                act.record(user_session.user, parent, ['reorder_children'])
                act.ensure_subscription(
                    user_session.user, parent, parent.program, self.reason)

            elif root is not None:
                survey = (
                    session.query(model.Survey)
                    .get((survey_id, program_id)))
                if not survey:
                    raise errors.MissingDocError("No such survey")
                log.debug("Reordering children of: %s", survey)
                reorder(survey.qnodes, son)
                act.record(
                    user_session.user, survey, ['reorder_children'])
                act.ensure_subscription(
                    user_session.user, survey, survey.program, self.reason)

            else:
                raise errors.ModelError(
                    "Survey or parent ID required")

            policy = user_session.policy.derive({
                'program': survey.program,
                'survey': survey,
                'surveygroups': survey.program.surveygroups,
            })
            policy.verify('surveygroup_interact')
            policy.verify('qnode_edit')

        self.query()
Ejemplo n.º 10
0
    rna_id_temp = subset_sample_ids_rna.tolist()

    meth_id_temp = subset_sample_ids_meth.tolist()

    # I think this explanation is wrong lol, code works beautifully
    # after for-loop, ordered_index will contain the indexes that methylation should change to in order to match rna
    # ex. ordered_index = [5,2,6,1,3,4], then in order for methylation to be in the same order as rna, we need to create
    # a new list that looks like: [meth[5],meth[2],meth[6],meth[1],meth[3],meth[4]]

    for rna_id in rna_id_temp:

        idx = meth_id_temp.index(rna_id)

        ordered_index.append(idx)

    reorder(subset_sample_ids_rna, ordered_index)

    reorder(data_rna, ordered_index)

    reorder(T_rna, ordered_index)

    reorder(subset_tissue_rna, ordered_index)

    # create joint data

    data = np.concatenate((data_rna, data_meth), 1)

    T = T_rna

    model = DPGMM(data=data,
                  tissue_assignments=T,
Ejemplo n.º 11
0
def doFitsClassic(ws, mhypVar, recoMassVar, cat, proc, allMasses, massScaleNuisance, resolutionNuisance):
    # classic fitting of signal MC

    # fitted values for this category and signal process
    # first index is the Gaussian component number
    # second index is the mass point index
    sigmaValues = []
    dmuValues = []
    fracValues = []
    normValues = []

    for mass in allMasses:

        # get the signal MC dataset
        # e.g. sig_Hem_unbinned_ggh_115_cat7
        dataset = utils.getObj(ws, "sig_Hem_unbinned_%s_%d_%s" % (proc, mass, cat))

        # get the signal pdf
        # e.g. sigpdf_vbf_115_cat8
        pdf = utils.getObj(ws, "sigpdf_%s_%d_%s" % (proc, mass, cat))

        #----------
        # adjust fit parameters if specified
        #----------

        sigmaVars = getGaussianVars(ws, "sigma", proc, mass, cat)
        dmuVars   = getGaussianVars(ws, "dmu",   proc, mass, cat)
        fracVars  = getGaussianVars(ws, "frac",  proc, mass, cat)

        numGaussians = len(sigmaVars)

        assert numGaussians == len(dmuVars)
        assert numGaussians == len(fracVars) + 1

        for varname, vars in (("sigma", sigmaVars),
                              ("dmu",   dmuVars),
                              ):
            for gaussianIndex in range(len(vars)):

                # set the variable range and initial value of this variable
                setVariableRange(fitparams,
                                 varname + "%d" % gaussianIndex,
                                 vars[gaussianIndex],
                                 proc,
                                 mass,
                                 cat)
            # end of loop over Gaussian components
        # end of loop over variables

        #----------
        # perform the fit
        #----------

        pdf.fitTo(dataset,
                  ROOT.RooFit.Minimizer("Minuit2"),
                  ROOT.RooFit.Range(mass + getFitParam(fitparams, "fitRangeLeft",  proc, mass, cat, - 5),
                                    mass + getFitParam(fitparams, "fitRangeRight", proc, mass, cat, +5)),

                  # take MC statistics error, not error on number of events...
                  ROOT.RooFit.SumW2Error(False),

                  )

        #----------
        # normalization object
        #----------

        sumWeights = dataset.sumEntries()
        normVar = ROOT.RooRealVar(pdf.GetName() + "_norm",
                                  pdf.GetName() + "_norm",
                                  sumWeights,
                                  0,
                                  sumWeights); gcs.append(normVar)
        normVar.setConstant(True)

        getattr(ws, 'import')(normVar)

        normValues.append(sumWeights)

        #----------
        # sort the Gaussian components according to the width
        #----------

        indices = sorted(range(numGaussians), key = lambda index: sigmaVars[index].getVal() )

        # instead of reordering the objects, we re-assign the values
        utils.reassignValues(indices, sigmaVars)
        utils.reassignValues(indices, dmuVars)

        # note that for the fractions (which are continued fractions),
        # we must expand them, sort and then collapse again
        # (the values will be different !)

        expandedFracValues = utils.expandContinuedFraction([ x.getVal() for x in fracVars])
        expandedFracValues = utils.reorder(indices, expandedFracValues)
        unexpandedFracValues = utils.collapseContinuedFraction(expandedFracValues)
        for value, var in zip(unexpandedFracValues, fracVars):
            var.setVal(value)

        #----------
        # fix the fitted parameters and read the fitted values
        #----------

        for vars, values in ((sigmaVars, sigmaValues),
                             (dmuVars, dmuValues),
                             (fracVars, fracValues),
                             ):

            if len(values) == 0:
                values.extend([[ ] for i in range(len(vars)) ] )

            # freeze the fitted variables at the fit final values
            # and add the values to a list for interpolation
            for gaussIndex, var in enumerate(vars):
                var.setConstant(True)
                values[gaussIndex].append(var.getVal())

        #----------


    # end of loop over masses

    #----------
    # produce the interpolating objects
    #----------
    interpDmuFuncs = []
    interpSigmaFuncs = []
    interpFracFuncs = []

    for varname, values, interpFuncs in (("sigma", sigmaValues, interpSigmaFuncs),
                                         ("dmu", dmuValues, interpDmuFuncs),
                                         ("frac", fracValues, interpFracFuncs)):

        for gaussIndex in range(len(values)):
            funcname = utils.makeGaussianVarname(varname + "func",
                                      proc,
                                      None, # mhyp
                                      cat,
                                      gaussIndex
                                      )

            func = utils.makePiecewiseLinearFunction(funcname,
                                                     mhypVar,
                                                     allMasses,
                                                     values[gaussIndex])

            # import this function into the workspace
            getattr(ws, 'import')(func, ROOT.RooFit.RecycleConflictNodes())

            interpFuncs.append(func)

        # end of loop over Gaussian components

    # end of loop over variables

    #----------
    # build the interpolated signal PDF
    #----------

    # example name: sigpdf_vbf_cat6

    suffix = "_".join([
        proc,
        # str(mhyp), # not used here
        cat,
        ])

    pdfname = "sigpdf_" + suffix
    pdf = utils.makeSumOfGaussians(pdfname,
                                   recoMassVar,       # reconstructed mass
                                   mhypVar,       # Higgs mass hypothesis
                                   interpDmuFuncs,
                                   interpSigmaFuncs,
                                   interpFracFuncs,
                                   massScaleNuisance = massScaleNuisance,
                                   resolutionNuisance = resolutionNuisance,
                                   ); gcs.append(pdf)

    # import this function into the workspace
    getattr(ws, 'import')(pdf, ROOT.RooFit.RecycleConflictNodes())

    #----------
    # build the interpolated normalization function
    #----------
    normfunc = utils.makePiecewiseLinearFunction(pdfname + "_norm",
                                                 mhypVar,
                                                 allMasses,
                                                 normValues); gcs.append(normfunc)

    # import this function into the workspace
    getattr(ws, 'import')(normfunc, ROOT.RooFit.RecycleConflictNodes())
Ejemplo n.º 12
0
    descs[i] = nx.descendants(H, ni)
    
for j in range(num_outputs):
    nj = nodes[outputs][j]
    ancs[j] = nx.ancestors(H, nj)


input_similarity = np.zeros((num_inputs, num_inputs))

for i in range(num_inputs):
    for j in range(num_inputs):
        seti = descs[i]
        setj = descs[j]
        input_similarity[i, j] = 2*len(seti & setj)/(len(seti) + len(setj))

indinp = reorder(input_similarity)

input_similarity = input_similarity[indinp, :][:, indinp]

output_similarity = np.zeros((num_outputs, num_outputs))

for i in range(num_outputs):
    for j in range(num_outputs):
        seti = ancs[i]
        setj = ancs[j]
        output_similarity[i, j] = 2*len(seti & setj)/(len(seti) + len(setj))

indout = reorder(output_similarity)

output_similarity = output_similarity[indout, :][:, indout]
Ejemplo n.º 13
0
def doc_scan_pipeline(input=PATH, output="./img/scanned_doc.jpg"):
    img = cv2.imread(input)

    # 0. Convert given image from BGR to RGB format
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    h, w, _ = img.shape
    img = cv2.resize(img, (width, height))

    # 1. Convert to grayscale
    img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

    # 2. Add Gaussian blur
    img_blur = cv2.GaussianBlur(img_gray, (5, 5), 1)

    # 3. Add Canny edge detection
    img_threshold = cv2.Canny(img_blur, 100, 200, L2gradient=True)

    # 3.1 Apply dilation
    kernel = np.ones((3, 3))
    img_threshold = cv2.dilate(img_threshold, kernel, iterations=2)

    # 4. Find all the contours
    img_contours = img.copy()
    img_big_contour = img.copy()
    contours, hierarchy = cv2.findContours(img_threshold, cv2.RETR_EXTERNAL,
                                           cv2.CHAIN_APPROX_SIMPLE)
    cv2.drawContours(image=img_contours,
                     contours=contours,
                     contourIdx=-1,
                     color=(0, 255, 0),
                     thickness=5)

    # 5. Find the biggest contour
    biggest, maxArea = biggest_contour(contours)
    biggest = reorder(biggest)
    cv2.drawContours(image=img_big_contour,
                     contours=biggest,
                     contourIdx=-1,
                     color=(0, 255, 0),
                     thickness=10)

    # 5.1 Draw a rectangle, i.e., 4 lines connecting the 4 dots corresponding to the largest contour
    img_big_contour = draw_rectangle(img_big_contour, biggest, thickness=2)

    pts1 = np.float32(biggest)
    pts2 = np.float32([[0, 0], [width, 0], [0, height], [width, height]])

    # 6. Image Warp
    # 6.1 Calculate a 3x3 perspective transform matrix
    matrix = cv2.getPerspectiveTransform(pts1, pts2)

    # 6.2 Apply the perspective matrix to the image
    img_warp_coloured = cv2.warpPerspective(img, matrix, (width, height))

    # 7. Adaptive thresholding
    img_warp_gray = cv2.cvtColor(img_warp_coloured, cv2.COLOR_BGR2GRAY)
    img_adaptive_th = cv2.adaptiveThreshold(img_warp_gray, 255, 1,
                                            cv2.THRESH_BINARY, 5, 2)

    # 7.1 Apply median blurring to remove tiny speckles of noise
    img_adaptive_th = cv2.medianBlur(img_adaptive_th, 3)

    # Save the document to disk
    cv2.imwrite(output, img_adaptive_th)

    # Add labels to each image
    img = draw_text(img, "Original")
    img_gray = draw_text(img_gray, "Grayscale")
    img_blur = draw_text(img_blur, "Gaussian Blur", pos=(int(width / 4), 50))
    img_threshold = draw_text(img_threshold,
                              "Canny Edge",
                              pos=(int(width / 4), 50))
    img_contours = draw_text(img_contours, "Contours")
    img_big_contour = draw_text(img_big_contour,
                                "Largest Contour",
                                pos=(int(width / 7), 50))
    img_warp_coloured = draw_text(img_warp_coloured,
                                  "Warp",
                                  pos=(int(width / 3), 50))
    img_adaptive_th = draw_text(img_adaptive_th,
                                "Adaptive Thresholding",
                                pos=(int(width / 7), 50),
                                font_scale=2,
                                font_thickness=6)

    blank_img = np.zeros((height, width, 3), dtype=np.uint8)
    image_list = [
        img, img_gray, img_blur, img_threshold, img_contours, img_big_contour,
        img_warp_coloured, img_adaptive_th
    ]

    # Combine the images into a grid
    # image_grid returns PIL image, np.asarray() can be used to convert it back to cv2 compatible format
    grid = np.asarray(image_grid(image_list, width, height))
Ejemplo n.º 14
0
def main():
    if request.method == "POST":
        if request.files:
            binaryImage = request.files["image"].read()
            pil_img = readimage(binaryImage)
            img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)

            SCALE_PERCENT = 40
            width = int(img.shape[1] * SCALE_PERCENT / 100)
            height = int(img.shape[0] * SCALE_PERCENT / 100)

            img = cv2.resize(img, (width, height))

            imgContours = img.copy()
            imgWithContourPoints = img.copy()

            imgGray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
            imgBlur = cv2.GaussianBlur(imgGray, (5, 5), 1)
            imgCanny = cv2.Canny(imgBlur, 10, 50)

            contours, hierarchy = cv2.findContours(imgCanny, cv2.RETR_EXTERNAL,
                                                   cv2.CHAIN_APPROX_SIMPLE)
            cv2.drawContours(imgContours, contours, -1, (0, 255, 0), 1)

            rects = rectContour(contours)

            firstChoiceAreaContour = getCornerPoints(rects[1])
            secondChoiceAreaContour = getCornerPoints(rects[0])
            studentIdContour = getCornerPoints(rects[4])
            examIdContour = getCornerPoints(rects[5])

            firstChoiceAreaContour = reorder(firstChoiceAreaContour)
            secondChoiceAreaContour = reorder(secondChoiceAreaContour)
            studentIdContour = reorder(studentIdContour)
            examIdContour = reorder(examIdContour)

            firstChoiceAreaImage = separateImageArea(img,
                                                     firstChoiceAreaContour,
                                                     175 * 3, 455 * 3)
            secondChoiceAreaImage = separateImageArea(img,
                                                      secondChoiceAreaContour,
                                                      175 * 3, 455 * 3)
            studentIdAreaImage = separateImageArea(img, studentIdContour,
                                                   80 * 2, 190 * 2)
            examIdAreaImage = separateImageArea(img, examIdContour, 40 * 5,
                                                190 * 5)

            examIdGray = cv2.cvtColor(examIdAreaImage, cv2.COLOR_RGB2GRAY)
            examIdThresh = cv2.threshold(
                examIdGray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]

            studentIdGray = cv2.cvtColor(studentIdAreaImage,
                                         cv2.COLOR_RGB2GRAY)
            studentIdThresh = cv2.threshold(
                studentIdGray, 0, 255,
                cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]

            firstChoiceAreaGray = cv2.cvtColor(firstChoiceAreaImage,
                                               cv2.COLOR_RGB2GRAY)
            firstChoiceAreaThresh = cv2.threshold(
                firstChoiceAreaGray, 0, 255,
                cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]

            secondChoiceAreaGray = cv2.cvtColor(secondChoiceAreaImage,
                                                cv2.COLOR_RGB2GRAY)
            secondChoiceAreaThresh = cv2.threshold(
                secondChoiceAreaGray, 0, 255,
                cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]

            # result
            examId = getExamIdValue(examIdThresh)
            studentId = getStudentIdValue(studentIdThresh)
            studentChoices = getChoiceValue(
                firstChoiceAreaThresh) + getChoiceValue(secondChoiceAreaThresh)

            return {
                "examId": examId,
                "studentId": studentId,
                "values": studentChoices
            }
Ejemplo n.º 15
0
imgCanny = cv2.Canny(imgBlur, 10, 50)


countours, hierarchy = cv2.findContours(imgCanny,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
cv2.drawContours(imgContours, countours,-1,(0,255,0),5)


rectCon = utils.rectContour(countours)
biggestContour = utils.getCornerPoints(rectCon[0])
#print(biggestContour.shape)


if biggestContour.size != 0:
  cv2.drawContours(imgBiggestContours, biggestContour, -1, (0,255,0), 10)
  
  biggestContour = utils.reorder(biggestContour)
  
  pt1 = np.float32(biggestContour)
  pt2 = np.float32([[0,0],[width,0],[0,height],[width,height]])
  matrix = cv2.getPerspectiveTransform(pt1,pt2)
  imgWarpColored = cv2.warpPerspective(img,matrix,(width,height))
  
  imgWarpGray = cv2.cvtColor(imgWarpColored,cv2.COLOR_BGR2GRAY)
  imgThresh = cv2.threshold(imgWarpGray, 170, 255,cv2.THRESH_BINARY_INV )[1]
  
  #print(imgThresh)
  
  utils.splitBoxes(imgThresh)